upstream/mercurial-mirror Commit - r50576:c261a628

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

DELTA_BASE_REUSE_NO,

23

DELTA_BASE_REUSE_NO,

24

KIND_CHANGELOG,

24

KIND_CHANGELOG,

25

KIND_FILELOG,

25

KIND_FILELOG,

26

KIND_MANIFESTLOG,

26

KIND_MANIFESTLOG,

27

REVIDX_ISCENSORED,

27

REVIDX_ISCENSORED,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

REVIDX_RAWTEXT_CHANGING_FLAGS,

29

)

29

)

30

31

from ..thirdparty import attr

31

from ..thirdparty import attr

32

33

from .. import (

33

from .. import (

34

error,

34

error,

35

mdiff,

35

mdiff,

36

util,

36

util,

37

)

37

)

38

39

from . import flagutil

39

from . import flagutil

40

41

# maximum <delta-chain-data>/<revision-text-length> ratio

41

# maximum <delta-chain-data>/<revision-text-length> ratio

42

LIMIT_DELTA2TEXT = 2

42

LIMIT_DELTA2TEXT = 2

43

44

45

class _testrevlog:

45

class _testrevlog:

46

"""minimalist fake revlog to use in doctests"""

46

"""minimalist fake revlog to use in doctests"""

47

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

49

"""data is an list of revision payload boundaries"""

49

"""data is an list of revision payload boundaries"""

50

self._data = data

50

self._data = data

51

self._srdensitythreshold = density

51

self._srdensitythreshold = density

52

self._srmingapsize = mingap

52

self._srmingapsize = mingap

53

self._snapshot = set(snapshot)

53

self._snapshot = set(snapshot)

54

self.index = None

54

self.index = None

55

56

def start(self, rev):

56

def start(self, rev):

57

if rev == nullrev:

57

if rev == nullrev:

58

return 0

58

return 0

59

if rev == 0:

59

if rev == 0:

60

return 0

60

return 0

61

return self._data[rev - 1]

61

return self._data[rev - 1]

62

63

def end(self, rev):

63

def end(self, rev):

64

if rev == nullrev:

64

if rev == nullrev:

65

return 0

65

return 0

66

return self._data[rev]

66

return self._data[rev]

67

68

def length(self, rev):

68

def length(self, rev):

69

return self.end(rev) - self.start(rev)

69

return self.end(rev) - self.start(rev)

70

71

def __len__(self):

71

def __len__(self):

72

return len(self._data)

72

return len(self._data)

73

74

def issnapshot(self, rev):

74

def issnapshot(self, rev):

75

if rev == nullrev:

75

if rev == nullrev:

76

return True

76

return True

77

return rev in self._snapshot

77

return rev in self._snapshot

78

79

80

def slicechunk(revlog, revs, targetsize=None):

80

def slicechunk(revlog, revs, targetsize=None):

81

"""slice revs to reduce the amount of unrelated data to be read from disk.

81

"""slice revs to reduce the amount of unrelated data to be read from disk.

82

83

``revs`` is sliced into groups that should be read in one time.

83

``revs`` is sliced into groups that should be read in one time.

84

Assume that revs are sorted.

84

Assume that revs are sorted.

85

86

The initial chunk is sliced until the overall density (payload/chunks-span

86

The initial chunk is sliced until the overall density (payload/chunks-span

87

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

87

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

88

`revlog._srmingapsize` is skipped.

88

`revlog._srmingapsize` is skipped.

89

90

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

90

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

91

For consistency with other slicing choice, this limit won't go lower than

91

For consistency with other slicing choice, this limit won't go lower than

92

`revlog._srmingapsize`.

92

`revlog._srmingapsize`.

93

94

If individual revisions chunk are larger than this limit, they will still

94

If individual revisions chunk are larger than this limit, they will still

95

be raised individually.

95

be raised individually.

96

97

>>> data = [

97

>>> data = [

98

... 5, #00 (5)

98

... 5, #00 (5)

99

... 10, #01 (5)

99

... 10, #01 (5)

100

... 12, #02 (2)

100

... 12, #02 (2)

101

... 12, #03 (empty)

101

... 12, #03 (empty)

102

... 27, #04 (15)

102

... 27, #04 (15)

103

... 31, #05 (4)

103

... 31, #05 (4)

104

... 31, #06 (empty)

104

... 31, #06 (empty)

105

... 42, #07 (11)

105

... 42, #07 (11)

106

... 47, #08 (5)

106

... 47, #08 (5)

107

... 47, #09 (empty)

107

... 47, #09 (empty)

108

... 48, #10 (1)

108

... 48, #10 (1)

109

... 51, #11 (3)

109

... 51, #11 (3)

110

... 74, #12 (23)

110

... 74, #12 (23)

111

... 85, #13 (11)

111

... 85, #13 (11)

112

... 86, #14 (1)

112

... 86, #14 (1)

113

... 91, #15 (5)

113

... 91, #15 (5)

114

... ]

114

... ]

115

>>> revlog = _testrevlog(data, snapshot=range(16))

115

>>> revlog = _testrevlog(data, snapshot=range(16))

116

117

>>> list(slicechunk(revlog, list(range(16))))

117

>>> list(slicechunk(revlog, list(range(16))))

118

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

118

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

119

>>> list(slicechunk(revlog, [0, 15]))

119

>>> list(slicechunk(revlog, [0, 15]))

120

[[0], [15]]

120

[[0], [15]]

121

>>> list(slicechunk(revlog, [0, 11, 15]))

121

>>> list(slicechunk(revlog, [0, 11, 15]))

122

[[0], [11], [15]]

122

[[0], [11], [15]]

123

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

123

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

124

[[0], [11, 13, 15]]

124

[[0], [11, 13, 15]]

125

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

125

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

126

[[1, 2], [5, 8, 10, 11], [14]]

126

[[1, 2], [5, 8, 10, 11], [14]]

127

128

Slicing with a maximum chunk size

128

Slicing with a maximum chunk size

129

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

129

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

130

[[0], [11], [13], [15]]

130

[[0], [11], [13], [15]]

131

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

131

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

132

[[0], [11], [13, 15]]

132

[[0], [11], [13, 15]]

133

134

Slicing involving nullrev

134

Slicing involving nullrev

135

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

135

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

136

[[-1, 0], [11], [13, 15]]

136

[[-1, 0], [11], [13, 15]]

137

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

137

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

138

[[-1], [13], [15]]

138

[[-1], [13], [15]]

139

"""

139

"""

140

if targetsize is not None:

140

if targetsize is not None:

141

targetsize = max(targetsize, revlog._srmingapsize)

141

targetsize = max(targetsize, revlog._srmingapsize)

142

# targetsize should not be specified when evaluating delta candidates:

142

# targetsize should not be specified when evaluating delta candidates:

143

# * targetsize is used to ensure we stay within specification when reading,

143

# * targetsize is used to ensure we stay within specification when reading,

144

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

144

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

145

if densityslicing is None:

145

if densityslicing is None:

146

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

146

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

147

for chunk in densityslicing(

147

for chunk in densityslicing(

148

revs, revlog._srdensitythreshold, revlog._srmingapsize

148

revs, revlog._srdensitythreshold, revlog._srmingapsize

149

):

149

):

150

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

150

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

151

yield subchunk

151

yield subchunk

152

153

154

def _slicechunktosize(revlog, revs, targetsize=None):

154

def _slicechunktosize(revlog, revs, targetsize=None):

155

"""slice revs to match the target size

155

"""slice revs to match the target size

156

157

This is intended to be used on chunk that density slicing selected by that

157

This is intended to be used on chunk that density slicing selected by that

158

are still too large compared to the read garantee of revlog. This might

158

are still too large compared to the read garantee of revlog. This might

159

happens when "minimal gap size" interrupted the slicing or when chain are

159

happens when "minimal gap size" interrupted the slicing or when chain are

160

built in a way that create large blocks next to each other.

160

built in a way that create large blocks next to each other.

161

162

>>> data = [

162

>>> data = [

163

... 3, #0 (3)

163

... 3, #0 (3)

164

... 5, #1 (2)

164

... 5, #1 (2)

165

... 6, #2 (1)

165

... 6, #2 (1)

166

... 8, #3 (2)

166

... 8, #3 (2)

167

... 8, #4 (empty)

167

... 8, #4 (empty)

168

... 11, #5 (3)

168

... 11, #5 (3)

169

... 12, #6 (1)

169

... 12, #6 (1)

170

... 13, #7 (1)

170

... 13, #7 (1)

171

... 14, #8 (1)

171

... 14, #8 (1)

172

... ]

172

... ]

173

174

== All snapshots cases ==

174

== All snapshots cases ==

175

>>> revlog = _testrevlog(data, snapshot=range(9))

175

>>> revlog = _testrevlog(data, snapshot=range(9))

176

177

Cases where chunk is already small enough

177

Cases where chunk is already small enough

178

>>> list(_slicechunktosize(revlog, [0], 3))

178

>>> list(_slicechunktosize(revlog, [0], 3))

179

[[0]]

179

[[0]]

180

>>> list(_slicechunktosize(revlog, [6, 7], 3))

180

>>> list(_slicechunktosize(revlog, [6, 7], 3))

181

[[6, 7]]

181

[[6, 7]]

182

>>> list(_slicechunktosize(revlog, [0], None))

182

>>> list(_slicechunktosize(revlog, [0], None))

183

[[0]]

183

[[0]]

184

>>> list(_slicechunktosize(revlog, [6, 7], None))

184

>>> list(_slicechunktosize(revlog, [6, 7], None))

185

[[6, 7]]

185

[[6, 7]]

186

187

cases where we need actual slicing

187

cases where we need actual slicing

188

>>> list(_slicechunktosize(revlog, [0, 1], 3))

188

>>> list(_slicechunktosize(revlog, [0, 1], 3))

189

[[0], [1]]

189

[[0], [1]]

190

>>> list(_slicechunktosize(revlog, [1, 3], 3))

190

>>> list(_slicechunktosize(revlog, [1, 3], 3))

191

[[1], [3]]

191

[[1], [3]]

192

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

192

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

193

[[1, 2], [3]]

193

[[1, 2], [3]]

194

>>> list(_slicechunktosize(revlog, [3, 5], 3))

194

>>> list(_slicechunktosize(revlog, [3, 5], 3))

195

[[3], [5]]

195

[[3], [5]]

196

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

196

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

197

[[3], [5]]

197

[[3], [5]]

198

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

198

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

199

[[5], [6, 7, 8]]

199

[[5], [6, 7, 8]]

200

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

200

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

201

[[0], [1, 2], [3], [5], [6, 7, 8]]

201

[[0], [1, 2], [3], [5], [6, 7, 8]]

202

203

Case with too large individual chunk (must return valid chunk)

203

Case with too large individual chunk (must return valid chunk)

204

>>> list(_slicechunktosize(revlog, [0, 1], 2))

204

>>> list(_slicechunktosize(revlog, [0, 1], 2))

205

[[0], [1]]

205

[[0], [1]]

206

>>> list(_slicechunktosize(revlog, [1, 3], 1))

206

>>> list(_slicechunktosize(revlog, [1, 3], 1))

207

[[1], [3]]

207

[[1], [3]]

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

209

[[3], [5]]

209

[[3], [5]]

210

211

== No Snapshot cases ==

211

== No Snapshot cases ==

212

>>> revlog = _testrevlog(data)

212

>>> revlog = _testrevlog(data)

213

214

Cases where chunk is already small enough

214

Cases where chunk is already small enough

215

>>> list(_slicechunktosize(revlog, [0], 3))

215

>>> list(_slicechunktosize(revlog, [0], 3))

216

[[0]]

216

[[0]]

217

>>> list(_slicechunktosize(revlog, [6, 7], 3))

217

>>> list(_slicechunktosize(revlog, [6, 7], 3))

218

[[6, 7]]

218

[[6, 7]]

219

>>> list(_slicechunktosize(revlog, [0], None))

219

>>> list(_slicechunktosize(revlog, [0], None))

220

[[0]]

220

[[0]]

221

>>> list(_slicechunktosize(revlog, [6, 7], None))

221

>>> list(_slicechunktosize(revlog, [6, 7], None))

222

[[6, 7]]

222

[[6, 7]]

223

224

cases where we need actual slicing

224

cases where we need actual slicing

225

>>> list(_slicechunktosize(revlog, [0, 1], 3))

225

>>> list(_slicechunktosize(revlog, [0, 1], 3))

226

[[0], [1]]

226

[[0], [1]]

227

>>> list(_slicechunktosize(revlog, [1, 3], 3))

227

>>> list(_slicechunktosize(revlog, [1, 3], 3))

228

[[1], [3]]

228

[[1], [3]]

229

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

229

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

230

[[1], [2, 3]]

230

[[1], [2, 3]]

231

>>> list(_slicechunktosize(revlog, [3, 5], 3))

231

>>> list(_slicechunktosize(revlog, [3, 5], 3))

232

[[3], [5]]

232

[[3], [5]]

233

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

233

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

234

[[3], [4, 5]]

234

[[3], [4, 5]]

235

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

235

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

236

[[5], [6, 7, 8]]

236

[[5], [6, 7, 8]]

237

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

237

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

238

[[0], [1, 2], [3], [5], [6, 7, 8]]

238

[[0], [1, 2], [3], [5], [6, 7, 8]]

239

240

Case with too large individual chunk (must return valid chunk)

240

Case with too large individual chunk (must return valid chunk)

241

>>> list(_slicechunktosize(revlog, [0, 1], 2))

241

>>> list(_slicechunktosize(revlog, [0, 1], 2))

242

[[0], [1]]

242

[[0], [1]]

243

>>> list(_slicechunktosize(revlog, [1, 3], 1))

243

>>> list(_slicechunktosize(revlog, [1, 3], 1))

244

[[1], [3]]

244

[[1], [3]]

245

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

245

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

246

[[3], [5]]

246

[[3], [5]]

247

248

== mixed case ==

248

== mixed case ==

249

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

249

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

250

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

250

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

251

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

251

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

252

"""

252

"""

253

assert targetsize is None or 0 <= targetsize

253

assert targetsize is None or 0 <= targetsize

254

startdata = revlog.start(revs[0])

254

startdata = revlog.start(revs[0])

255

enddata = revlog.end(revs[-1])

255

enddata = revlog.end(revs[-1])

256

fullspan = enddata - startdata

256

fullspan = enddata - startdata

257

if targetsize is None or fullspan <= targetsize:

257

if targetsize is None or fullspan <= targetsize:

258

yield revs

258

yield revs

259

return

259

return

260

261

startrevidx = 0

261

startrevidx = 0

262

endrevidx = 1

262

endrevidx = 1

263

iterrevs = enumerate(revs)

263

iterrevs = enumerate(revs)

264

next(iterrevs) # skip first rev.

264

next(iterrevs) # skip first rev.

265

# first step: get snapshots out of the way

265

# first step: get snapshots out of the way

266

for idx, r in iterrevs:

266

for idx, r in iterrevs:

267

span = revlog.end(r) - startdata

267

span = revlog.end(r) - startdata

268

snapshot = revlog.issnapshot(r)

268

snapshot = revlog.issnapshot(r)

269

if span <= targetsize and snapshot:

269

if span <= targetsize and snapshot:

270

endrevidx = idx + 1

270

endrevidx = idx + 1

271

else:

271

else:

272

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

272

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

273

if chunk:

273

if chunk:

274

yield chunk

274

yield chunk

275

startrevidx = idx

275

startrevidx = idx

276

startdata = revlog.start(r)

276

startdata = revlog.start(r)

277

endrevidx = idx + 1

277

endrevidx = idx + 1

278

if not snapshot:

278

if not snapshot:

279

break

279

break

280

281

# for the others, we use binary slicing to quickly converge toward valid

281

# for the others, we use binary slicing to quickly converge toward valid

282

# chunks (otherwise, we might end up looking for start/end of many

282

# chunks (otherwise, we might end up looking for start/end of many

283

# revisions). This logic is not looking for the perfect slicing point, it

283

# revisions). This logic is not looking for the perfect slicing point, it

284

# focuses on quickly converging toward valid chunks.

284

# focuses on quickly converging toward valid chunks.

285

nbitem = len(revs)

285

nbitem = len(revs)

286

while (enddata - startdata) > targetsize:

286

while (enddata - startdata) > targetsize:

287

endrevidx = nbitem

287

endrevidx = nbitem

288

if nbitem - startrevidx <= 1:

288

if nbitem - startrevidx <= 1:

289

break # protect against individual chunk larger than limit

289

break # protect against individual chunk larger than limit

290

localenddata = revlog.end(revs[endrevidx - 1])

290

localenddata = revlog.end(revs[endrevidx - 1])

291

span = localenddata - startdata

291

span = localenddata - startdata

292

while span > targetsize:

292

while span > targetsize:

293

if endrevidx - startrevidx <= 1:

293

if endrevidx - startrevidx <= 1:

294

break # protect against individual chunk larger than limit

294

break # protect against individual chunk larger than limit

295

endrevidx -= (endrevidx - startrevidx) // 2

295

endrevidx -= (endrevidx - startrevidx) // 2

296

localenddata = revlog.end(revs[endrevidx - 1])

296

localenddata = revlog.end(revs[endrevidx - 1])

297

span = localenddata - startdata

297

span = localenddata - startdata

298

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

298

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

299

if chunk:

299

if chunk:

300

yield chunk

300

yield chunk

301

startrevidx = endrevidx

301

startrevidx = endrevidx

302

startdata = revlog.start(revs[startrevidx])

302

startdata = revlog.start(revs[startrevidx])

303

304

chunk = _trimchunk(revlog, revs, startrevidx)

304

chunk = _trimchunk(revlog, revs, startrevidx)

305

if chunk:

305

if chunk:

306

yield chunk

306

yield chunk

307

308

309

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

309

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

310

"""slice revs to reduce the amount of unrelated data to be read from disk.

310

"""slice revs to reduce the amount of unrelated data to be read from disk.

311

312

``revs`` is sliced into groups that should be read in one time.

312

``revs`` is sliced into groups that should be read in one time.

313

Assume that revs are sorted.

313

Assume that revs are sorted.

314

315

The initial chunk is sliced until the overall density (payload/chunks-span

315

The initial chunk is sliced until the overall density (payload/chunks-span

316

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

316

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

317

skipped.

317

skipped.

318

319

>>> revlog = _testrevlog([

319

>>> revlog = _testrevlog([

320

... 5, #00 (5)

320

... 5, #00 (5)

321

... 10, #01 (5)

321

... 10, #01 (5)

322

... 12, #02 (2)

322

... 12, #02 (2)

323

... 12, #03 (empty)

323

... 12, #03 (empty)

324

... 27, #04 (15)

324

... 27, #04 (15)

325

... 31, #05 (4)

325

... 31, #05 (4)

326

... 31, #06 (empty)

326

... 31, #06 (empty)

327

... 42, #07 (11)

327

... 42, #07 (11)

328

... 47, #08 (5)

328

... 47, #08 (5)

329

... 47, #09 (empty)

329

... 47, #09 (empty)

330

... 48, #10 (1)

330

... 48, #10 (1)

331

... 51, #11 (3)

331

... 51, #11 (3)

332

... 74, #12 (23)

332

... 74, #12 (23)

333

... 85, #13 (11)

333

... 85, #13 (11)

334

... 86, #14 (1)

334

... 86, #14 (1)

335

... 91, #15 (5)

335

... 91, #15 (5)

336

... ])

336

... ])

337

338

>>> list(_slicechunktodensity(revlog, list(range(16))))

338

>>> list(_slicechunktodensity(revlog, list(range(16))))

339

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

339

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

340

>>> list(_slicechunktodensity(revlog, [0, 15]))

340

>>> list(_slicechunktodensity(revlog, [0, 15]))

341

[[0], [15]]

341

[[0], [15]]

342

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

342

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

343

[[0], [11], [15]]

343

[[0], [11], [15]]

344

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

344

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

345

[[0], [11, 13, 15]]

345

[[0], [11, 13, 15]]

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

347

[[1, 2], [5, 8, 10, 11], [14]]

347

[[1, 2], [5, 8, 10, 11], [14]]

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

... mingapsize=20))

349

... mingapsize=20))

350

[[1, 2, 3, 5, 8, 10, 11], [14]]

350

[[1, 2, 3, 5, 8, 10, 11], [14]]

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

... targetdensity=0.95))

352

... targetdensity=0.95))

353

[[1, 2], [5], [8, 10, 11], [14]]

353

[[1, 2], [5], [8, 10, 11], [14]]

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

354

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

355

... targetdensity=0.95, mingapsize=12))

355

... targetdensity=0.95, mingapsize=12))

356

[[1, 2], [5, 8, 10, 11], [14]]

356

[[1, 2], [5, 8, 10, 11], [14]]

357

"""

357

"""

358

start = revlog.start

358

start = revlog.start

359

length = revlog.length

359

length = revlog.length

360

361

if len(revs) <= 1:

361

if len(revs) <= 1:

362

yield revs

362

yield revs

363

return

363

return

364

365

deltachainspan = segmentspan(revlog, revs)

365

deltachainspan = segmentspan(revlog, revs)

366

367

if deltachainspan < mingapsize:

367

if deltachainspan < mingapsize:

368

yield revs

368

yield revs

369

return

369

return

370

371

readdata = deltachainspan

371

readdata = deltachainspan

372

chainpayload = sum(length(r) for r in revs)

372

chainpayload = sum(length(r) for r in revs)

373

374

if deltachainspan:

374

if deltachainspan:

375

density = chainpayload / float(deltachainspan)

375

density = chainpayload / float(deltachainspan)

376

else:

376

else:

377

density = 1.0

377

density = 1.0

378

379

if density >= targetdensity:

379

if density >= targetdensity:

380

yield revs

380

yield revs

381

return

381

return

382

383

# Store the gaps in a heap to have them sorted by decreasing size

383

# Store the gaps in a heap to have them sorted by decreasing size

384

gaps = []

384

gaps = []

385

prevend = None

385

prevend = None

386

for i, rev in enumerate(revs):

386

for i, rev in enumerate(revs):

387

revstart = start(rev)

387

revstart = start(rev)

388

revlen = length(rev)

388

revlen = length(rev)

389

390

# Skip empty revisions to form larger holes

390

# Skip empty revisions to form larger holes

391

if revlen == 0:

391

if revlen == 0:

392

continue

392

continue

393

394

if prevend is not None:

394

if prevend is not None:

395

gapsize = revstart - prevend

395

gapsize = revstart - prevend

396

# only consider holes that are large enough

396

# only consider holes that are large enough

397

if gapsize > mingapsize:

397

if gapsize > mingapsize:

398

gaps.append((gapsize, i))

398

gaps.append((gapsize, i))

399

400

prevend = revstart + revlen

400

prevend = revstart + revlen

401

# sort the gaps to pop them from largest to small

401

# sort the gaps to pop them from largest to small

402

gaps.sort()

402

gaps.sort()

403

404

# Collect the indices of the largest holes until the density is acceptable

404

# Collect the indices of the largest holes until the density is acceptable

405

selected = []

405

selected = []

406

while gaps and density < targetdensity:

406

while gaps and density < targetdensity:

407

gapsize, gapidx = gaps.pop()

407

gapsize, gapidx = gaps.pop()

408

409

selected.append(gapidx)

409

selected.append(gapidx)

410

411

# the gap sizes are stored as negatives to be sorted decreasingly

411

# the gap sizes are stored as negatives to be sorted decreasingly

412

# by the heap

412

# by the heap

413

readdata -= gapsize

413

readdata -= gapsize

414

if readdata > 0:

414

if readdata > 0:

415

density = chainpayload / float(readdata)

415

density = chainpayload / float(readdata)

416

else:

416

else:

417

density = 1.0

417

density = 1.0

418

selected.sort()

418

selected.sort()

419

420

# Cut the revs at collected indices

420

# Cut the revs at collected indices

421

previdx = 0

421

previdx = 0

422

for idx in selected:

422

for idx in selected:

423

424

chunk = _trimchunk(revlog, revs, previdx, idx)

424

chunk = _trimchunk(revlog, revs, previdx, idx)

425

if chunk:

425

if chunk:

426

yield chunk

426

yield chunk

427

428

previdx = idx

428

previdx = idx

429

430

chunk = _trimchunk(revlog, revs, previdx)

430

chunk = _trimchunk(revlog, revs, previdx)

431

if chunk:

431

if chunk:

432

yield chunk

432

yield chunk

433

434

435

def _trimchunk(revlog, revs, startidx, endidx=None):

435

def _trimchunk(revlog, revs, startidx, endidx=None):

436

"""returns revs[startidx:endidx] without empty trailing revs

436

"""returns revs[startidx:endidx] without empty trailing revs

437

438

Doctest Setup

438

Doctest Setup

439

>>> revlog = _testrevlog([

439

>>> revlog = _testrevlog([

440

... 5, #0

440

... 5, #0

441

... 10, #1

441

... 10, #1

442

... 12, #2

442

... 12, #2

443

... 12, #3 (empty)

443

... 12, #3 (empty)

444

... 17, #4

444

... 17, #4

445

... 21, #5

445

... 21, #5

446

... 21, #6 (empty)

446

... 21, #6 (empty)

447

... ])

447

... ])

448

449

Contiguous cases:

449

Contiguous cases:

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

451

[0, 1, 2, 3, 4, 5]

451

[0, 1, 2, 3, 4, 5]

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

453

[0, 1, 2, 3, 4]

453

[0, 1, 2, 3, 4]

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

455

[0, 1, 2]

455

[0, 1, 2]

456

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

456

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

457

[2]

457

[2]

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

458

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

459

[3, 4, 5]

459

[3, 4, 5]

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

460

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

461

[3, 4]

461

[3, 4]

462

463

Discontiguous cases:

463

Discontiguous cases:

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

465

[1, 3, 5]

465

[1, 3, 5]

466

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

466

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

467

[1]

467

[1]

468

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

468

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

469

[3, 5]

469

[3, 5]

470

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

470

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

471

[3, 5]

471

[3, 5]

472

"""

472

"""

473

length = revlog.length

473

length = revlog.length

474

475

if endidx is None:

475

if endidx is None:

476

endidx = len(revs)

476

endidx = len(revs)

477

478

# If we have a non-emtpy delta candidate, there are nothing to trim

478

# If we have a non-emtpy delta candidate, there are nothing to trim

479

if revs[endidx - 1] < len(revlog):

479

if revs[endidx - 1] < len(revlog):

480

# Trim empty revs at the end, except the very first revision of a chain

480

# Trim empty revs at the end, except the very first revision of a chain

481

while (

481

while (

482

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

482

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

483

):

483

):

484

endidx -= 1

484

endidx -= 1

485

486

return revs[startidx:endidx]

486

return revs[startidx:endidx]

487

488

489

def segmentspan(revlog, revs):

489

def segmentspan(revlog, revs):

490

"""Get the byte span of a segment of revisions

490

"""Get the byte span of a segment of revisions

491

492

revs is a sorted array of revision numbers

492

revs is a sorted array of revision numbers

493

494

>>> revlog = _testrevlog([

494

>>> revlog = _testrevlog([

495

... 5, #0

495

... 5, #0

496

... 10, #1

496

... 10, #1

497

... 12, #2

497

... 12, #2

498

... 12, #3 (empty)

498

... 12, #3 (empty)

499

... 17, #4

499

... 17, #4

500

... ])

500

... ])

501

502

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

502

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

503

17

503

17

504

>>> segmentspan(revlog, [0, 4])

504

>>> segmentspan(revlog, [0, 4])

505

17

505

17

506

>>> segmentspan(revlog, [3, 4])

506

>>> segmentspan(revlog, [3, 4])

507

5

507

5

508

>>> segmentspan(revlog, [1, 2, 3,])

508

>>> segmentspan(revlog, [1, 2, 3,])

509

7

509

7

510

>>> segmentspan(revlog, [1, 3])

510

>>> segmentspan(revlog, [1, 3])

511

7

511

7

512

"""

512

"""

513

if not revs:

513

if not revs:

514

return 0

514

return 0

515

end = revlog.end(revs[-1])

515

end = revlog.end(revs[-1])

516

return end - revlog.start(revs[0])

516

return end - revlog.start(revs[0])

517

518

519

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

519

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

520

"""build full text from a (base, delta) pair and other metadata"""

520

"""build full text from a (base, delta) pair and other metadata"""

521

# special case deltas which replace entire base; no need to decode

521

# special case deltas which replace entire base; no need to decode

522

# base revision. this neatly avoids censored bases, which throw when

522

# base revision. this neatly avoids censored bases, which throw when

523

# they're decoded.

523

# they're decoded.

524

hlen = struct.calcsize(b">lll")

524

hlen = struct.calcsize(b">lll")

525

if delta[:hlen] == mdiff.replacediffheader(

525

if delta[:hlen] == mdiff.replacediffheader(

526

revlog.rawsize(baserev), len(delta) - hlen

526

revlog.rawsize(baserev), len(delta) - hlen

527

):

527

):

528

fulltext = delta[hlen:]

528

fulltext = delta[hlen:]

529

else:

529

else:

530

# deltabase is rawtext before changed by flag processors, which is

530

# deltabase is rawtext before changed by flag processors, which is

531

# equivalent to non-raw text

531

# equivalent to non-raw text

532

basetext = revlog.revision(baserev, _df=fh)

532

basetext = revlog.revision(baserev, _df=fh)

533

fulltext = mdiff.patch(basetext, delta)

533

fulltext = mdiff.patch(basetext, delta)

534

535

try:

535

try:

536

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

536

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

537

if validatehash:

537

if validatehash:

538

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

538

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

539

if flags & REVIDX_ISCENSORED:

539

if flags & REVIDX_ISCENSORED:

540

raise error.StorageError(

540

raise error.StorageError(

541

_(b'node %s is not censored') % expectednode

541

_(b'node %s is not censored') % expectednode

542

)

542

)

543

except error.CensoredNodeError:

543

except error.CensoredNodeError:

544

# must pass the censored index flag to add censored revisions

544

# must pass the censored index flag to add censored revisions

545

if not flags & REVIDX_ISCENSORED:

545

if not flags & REVIDX_ISCENSORED:

546

raise

546

raise

547

return fulltext

547

return fulltext

548

549

550

@attr.s(slots=True, frozen=True)

550

@attr.s(slots=True, frozen=True)

551

class _deltainfo:

551

class _deltainfo:

552

distance = attr.ib()

552

distance = attr.ib()

553

deltalen = attr.ib()

553

deltalen = attr.ib()

554

data = attr.ib()

554

data = attr.ib()

555

base = attr.ib()

555

base = attr.ib()

556

chainbase = attr.ib()

556

chainbase = attr.ib()

557

chainlen = attr.ib()

557

chainlen = attr.ib()

558

compresseddeltalen = attr.ib()

558

compresseddeltalen = attr.ib()

559

snapshotdepth = attr.ib()

559

snapshotdepth = attr.ib()

560

561

562

def drop_u_compression(delta):

562

def drop_u_compression(delta):

563

"""turn into a "u" (no-compression) into no-compression without header

563

"""turn into a "u" (no-compression) into no-compression without header

564

565

This is useful for revlog format that has better compression method.

565

This is useful for revlog format that has better compression method.

566

"""

566

"""

567

assert delta.data[0] == b'u', delta.data[0]

567

assert delta.data[0] == b'u', delta.data[0]

568

return _deltainfo(

568

return _deltainfo(

569

delta.distance,

569

delta.distance,

570

delta.deltalen - 1,

570

delta.deltalen - 1,

571

(b'', delta.data[1]),

571

(b'', delta.data[1]),

572

delta.base,

572

delta.base,

573

delta.chainbase,

573

delta.chainbase,

574

delta.chainlen,

574

delta.chainlen,

575

delta.compresseddeltalen,

575

delta.compresseddeltalen,

576

delta.snapshotdepth,

576

delta.snapshotdepth,

577

)

577

)

578

579

580

def is_good_delta_info(revlog, deltainfo, revinfo):

580

def is_good_delta_info(revlog, deltainfo, revinfo):

581

"""Returns True if the given delta is good. Good means that it is within

581

"""Returns True if the given delta is good. Good means that it is within

582

the disk span, disk size, and chain length bounds that we know to be

582

the disk span, disk size, and chain length bounds that we know to be

583

performant."""

583

performant."""

584

if deltainfo is None:

584

if deltainfo is None:

585

return False

585

return False

586

587

# - 'deltainfo.distance' is the distance from the base revision --

587

# - 'deltainfo.distance' is the distance from the base revision --

588

# bounding it limits the amount of I/O we need to do.

588

# bounding it limits the amount of I/O we need to do.

589

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

589

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

590

# deltas we need to apply -- bounding it limits the amount of CPU

590

# deltas we need to apply -- bounding it limits the amount of CPU

591

# we consume.

591

# we consume.

592

593

textlen = revinfo.textlen

593

textlen = revinfo.textlen

594

defaultmax = textlen * 4

594

defaultmax = textlen * 4

595

maxdist = revlog._maxdeltachainspan

595

maxdist = revlog._maxdeltachainspan

596

if not maxdist:

596

if not maxdist:

597

maxdist = deltainfo.distance # ensure the conditional pass

597

maxdist = deltainfo.distance # ensure the conditional pass

598

maxdist = max(maxdist, defaultmax)

598

maxdist = max(maxdist, defaultmax)

599

600

# Bad delta from read span:

600

# Bad delta from read span:

601

#

601

#

602

# If the span of data read is larger than the maximum allowed.

602

# If the span of data read is larger than the maximum allowed.

603

#

603

#

604

# In the sparse-revlog case, we rely on the associated "sparse reading"

604

# In the sparse-revlog case, we rely on the associated "sparse reading"

605

# to avoid issue related to the span of data. In theory, it would be

605

# to avoid issue related to the span of data. In theory, it would be

606

# possible to build pathological revlog where delta pattern would lead

606

# possible to build pathological revlog where delta pattern would lead

607

# to too many reads. However, they do not happen in practice at all. So

607

# to too many reads. However, they do not happen in practice at all. So

608

# we skip the span check entirely.

608

# we skip the span check entirely.

609

if not revlog._sparserevlog and maxdist < deltainfo.distance:

609

if not revlog._sparserevlog and maxdist < deltainfo.distance:

610

return False

610

return False

611

612

# Bad delta from new delta size:

612

# Bad delta from new delta size:

613

#

613

#

614

# If the delta size is larger than the target text, storing the

614

# If the delta size is larger than the target text, storing the

615

# delta will be inefficient.

615

# delta will be inefficient.

616

if textlen < deltainfo.deltalen:

616

if textlen < deltainfo.deltalen:

617

return False

617

return False

618

619

# Bad delta from cumulated payload size:

619

# Bad delta from cumulated payload size:

620

#

620

#

621

# If the sum of delta get larger than K * target text length.

621

# If the sum of delta get larger than K * target text length.

622

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

622

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

623

return False

623

return False

624

625

# Bad delta from chain length:

625

# Bad delta from chain length:

626

#

626

#

627

# If the number of delta in the chain gets too high.

627

# If the number of delta in the chain gets too high.

628

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

628

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

629

return False

629

return False

630

631

# bad delta from intermediate snapshot size limit

631

# bad delta from intermediate snapshot size limit

632

#

632

#

633

# If an intermediate snapshot size is higher than the limit. The

633

# If an intermediate snapshot size is higher than the limit. The

634

# limit exist to prevent endless chain of intermediate delta to be

634

# limit exist to prevent endless chain of intermediate delta to be

635

# created.

635

# created.

636

if (

636

if (

637

deltainfo.snapshotdepth is not None

637

deltainfo.snapshotdepth is not None

638

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

638

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

639

):

639

):

640

return False

640

return False

641

642

# bad delta if new intermediate snapshot is larger than the previous

642

# bad delta if new intermediate snapshot is larger than the previous

643

# snapshot

643

# snapshot

644

if (

644

if (

645

deltainfo.snapshotdepth

645

deltainfo.snapshotdepth

646

and revlog.length(deltainfo.base) < deltainfo.deltalen

646

and revlog.length(deltainfo.base) < deltainfo.deltalen

647

):

647

):

648

return False

648

return False

649

650

return True

650

return True

651

652

653

# If a revision's full text is that much bigger than a base candidate full

653

# If a revision's full text is that much bigger than a base candidate full

654

# text's, it is very unlikely that it will produce a valid delta. We no longer

654

# text's, it is very unlikely that it will produce a valid delta. We no longer

655

# consider these candidates.

655

# consider these candidates.

656

LIMIT_BASE2TEXT = 500

656

LIMIT_BASE2TEXT = 500

657

658

659

def _candidategroups(

659

def _candidategroups(

660

revlog,

660

revlog,

661

textlen,

661

textlen,

662

p1,

662

p1,

663

p2,

663

p2,

664

cachedelta,

664

cachedelta,

665

excluded_bases=None,

665

excluded_bases=None,

666

target_rev=None,

666

target_rev=None,

667

snapshot_cache=None,

667

):

668

):

668

"""Provides group of revision to be tested as delta base

669

"""Provides group of revision to be tested as delta base

669

670

This top level function focus on emitting groups with unique and worthwhile

671

This top level function focus on emitting groups with unique and worthwhile

671

content. See _raw_candidate_groups for details about the group order.

672

content. See _raw_candidate_groups for details about the group order.

672

"""

673

"""

673

# should we try to build a delta?

674

# should we try to build a delta?

674

if not (len(revlog) and revlog._storedeltachains):

675

if not (len(revlog) and revlog._storedeltachains):

675

yield None

676

yield None

676

return

677

return

677

678

deltalength = revlog.length

679

deltalength = revlog.length

679

deltaparent = revlog.deltaparent

680

deltaparent = revlog.deltaparent

680

sparse = revlog._sparserevlog

681

sparse = revlog._sparserevlog

681

good = None

682

good = None

682

683

deltas_limit = textlen * LIMIT_DELTA2TEXT

684

deltas_limit = textlen * LIMIT_DELTA2TEXT

684

group_chunk_size = revlog._candidate_group_chunk_size

685

group_chunk_size = revlog._candidate_group_chunk_size

685

686

tested = {nullrev}

687

tested = {nullrev}

687

candidates = _refinedgroups(

688

candidates = _refinedgroups(

688

revlog,

689

revlog,

689

p1,

690

p1,

690

p2,

691

p2,

691

cachedelta,

692

cachedelta,

693

snapshot_cache=snapshot_cache,

692

)

694

)

693

while True:

695

while True:

694

temptative = candidates.send(good)

696

temptative = candidates.send(good)

695

if temptative is None:

697

if temptative is None:

696

break

698

break

697

group = []

699

group = []

698

for rev in temptative:

700

for rev in temptative:

699

# skip over empty delta (no need to include them in a chain)

701

# skip over empty delta (no need to include them in a chain)

700

while revlog._generaldelta and not (

702

while revlog._generaldelta and not (

701

rev == nullrev or rev in tested or deltalength(rev)

703

rev == nullrev or rev in tested or deltalength(rev)

702

):

704

):

703

tested.add(rev)

705

tested.add(rev)

704

rev = deltaparent(rev)

706

rev = deltaparent(rev)

705

# no need to try a delta against nullrev, this will be done as a

707

# no need to try a delta against nullrev, this will be done as a

706

# last resort.

708

# last resort.

707

if rev == nullrev:

709

if rev == nullrev:

708

continue

710

continue

709

# filter out revision we tested already

711

# filter out revision we tested already

710

if rev in tested:

712

if rev in tested:

711

continue

713

continue

712

# an higher authority deamed the base unworthy (e.g. censored)

714

# an higher authority deamed the base unworthy (e.g. censored)

713

if excluded_bases is not None and rev in excluded_bases:

715

if excluded_bases is not None and rev in excluded_bases:

714

tested.add(rev)

716

tested.add(rev)

715

continue

717

continue

716

# We are in some recomputation cases and that rev is too high in

718

# We are in some recomputation cases and that rev is too high in

717

# the revlog

719

# the revlog

718

if target_rev is not None and rev >= target_rev:

720

if target_rev is not None and rev >= target_rev:

719

tested.add(rev)

721

tested.add(rev)

720

continue

722

continue

721

# filter out delta base that will never produce good delta

723

# filter out delta base that will never produce good delta

722

if deltas_limit < revlog.length(rev):

724

if deltas_limit < revlog.length(rev):

723

tested.add(rev)

725

tested.add(rev)

724

continue

726

continue

725

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

727

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

726

tested.add(rev)

728

tested.add(rev)

727

continue

729

continue

728

# no delta for rawtext-changing revs (see "candelta" for why)

730

# no delta for rawtext-changing revs (see "candelta" for why)

729

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

731

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

730

tested.add(rev)

732

tested.add(rev)

731

continue

733

continue

732

734

733

# If we reach here, we are about to build and test a delta.

735

# If we reach here, we are about to build and test a delta.

734

# The delta building process will compute the chaininfo in all

736

# The delta building process will compute the chaininfo in all

735

# case, since that computation is cached, it is fine to access it

737

# case, since that computation is cached, it is fine to access it

736

# here too.

738

# here too.

737

chainlen, chainsize = revlog._chaininfo(rev)

739

chainlen, chainsize = revlog._chaininfo(rev)

738

# if chain will be too long, skip base

740

# if chain will be too long, skip base

739

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

741

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

740

tested.add(rev)

742

tested.add(rev)

741

continue

743

continue

742

# if chain already have too much data, skip base

744

# if chain already have too much data, skip base

743

if deltas_limit < chainsize:

745

if deltas_limit < chainsize:

744

tested.add(rev)

746

tested.add(rev)

745

continue

747

continue

746

if sparse and revlog.upperboundcomp is not None:

748

if sparse and revlog.upperboundcomp is not None:

747

maxcomp = revlog.upperboundcomp

749

maxcomp = revlog.upperboundcomp

748

basenotsnap = (p1, p2, nullrev)

750

basenotsnap = (p1, p2, nullrev)

749

if rev not in basenotsnap and revlog.issnapshot(rev):

751

if rev not in basenotsnap and revlog.issnapshot(rev):

750

snapshotdepth = revlog.snapshotdepth(rev)

752

snapshotdepth = revlog.snapshotdepth(rev)

751

# If text is significantly larger than the base, we can

753

# If text is significantly larger than the base, we can

752

# expect the resulting delta to be proportional to the size

754

# expect the resulting delta to be proportional to the size

753

# difference

755

# difference

754

revsize = revlog.rawsize(rev)

756

revsize = revlog.rawsize(rev)

755

rawsizedistance = max(textlen - revsize, 0)

757

rawsizedistance = max(textlen - revsize, 0)

756

# use an estimate of the compression upper bound.

758

# use an estimate of the compression upper bound.

757

lowestrealisticdeltalen = rawsizedistance // maxcomp

759

lowestrealisticdeltalen = rawsizedistance // maxcomp

758

760

759

# check the absolute constraint on the delta size

761

# check the absolute constraint on the delta size

760

snapshotlimit = textlen >> snapshotdepth

762

snapshotlimit = textlen >> snapshotdepth

761

if snapshotlimit < lowestrealisticdeltalen:

763

if snapshotlimit < lowestrealisticdeltalen:

762

# delta lower bound is larger than accepted upper bound

764

# delta lower bound is larger than accepted upper bound

763

tested.add(rev)

765

tested.add(rev)

764

continue

766

continue

765

767

766

# check the relative constraint on the delta size

768

# check the relative constraint on the delta size

767

revlength = revlog.length(rev)

769

revlength = revlog.length(rev)

768

if revlength < lowestrealisticdeltalen:

770

if revlength < lowestrealisticdeltalen:

769

# delta probable lower bound is larger than target base

771

# delta probable lower bound is larger than target base

770

tested.add(rev)

772

tested.add(rev)

771

continue

773

continue

772

774

773

group.append(rev)

775

group.append(rev)

774

if group:

776

if group:

775

# When the size of the candidate group is big, it can result in a

777

# When the size of the candidate group is big, it can result in a

776

# quite significant performance impact. To reduce this, we can send

778

# quite significant performance impact. To reduce this, we can send

777

# them in smaller batches until the new batch does not provide any

779

# them in smaller batches until the new batch does not provide any

778

# improvements.

780

# improvements.

779

#

781

#

780

# This might reduce the overall efficiency of the compression in

782

# This might reduce the overall efficiency of the compression in

781

# some corner cases, but that should also prevent very pathological

783

# some corner cases, but that should also prevent very pathological

782

# cases from being an issue. (eg. 20 000 candidates).

784

# cases from being an issue. (eg. 20 000 candidates).

783

#

785

#

784

# XXX note that the ordering of the group becomes important as it

786

# XXX note that the ordering of the group becomes important as it

785

# now impacts the final result. The current order is unprocessed

787

# now impacts the final result. The current order is unprocessed

786

# and can be improved.

788

# and can be improved.

787

if group_chunk_size == 0:

789

if group_chunk_size == 0:

788

tested.update(group)

790

tested.update(group)

789

good = yield tuple(group)

791

good = yield tuple(group)

790

else:

792

else:

791

prev_good = good

793

prev_good = good

792

for start in range(0, len(group), group_chunk_size):

794

for start in range(0, len(group), group_chunk_size):

793

sub_group = group[start : start + group_chunk_size]

795

sub_group = group[start : start + group_chunk_size]

794

tested.update(sub_group)

796

tested.update(sub_group)

795

good = yield tuple(sub_group)

797

good = yield tuple(sub_group)

796

if prev_good == good:

798

if prev_good == good:

797

break

799

break

798

800

799

yield None

801

yield None

800

802

801

803

802

def _refinedgroups(revlog, p1, p2, cachedelta):

804

def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

803

good = None

805

good = None

804

# First we try to reuse a the delta contained in the bundle.

806

# First we try to reuse a the delta contained in the bundle.

805

# (or from the source revlog)

807

# (or from the source revlog)

806

#

808

#

807

# This logic only applies to general delta repositories and can be disabled

809

# This logic only applies to general delta repositories and can be disabled

808

# through configuration. Disabling reuse source delta is useful when

810

# through configuration. Disabling reuse source delta is useful when

809

# we want to make sure we recomputed "optimal" deltas.

811

# we want to make sure we recomputed "optimal" deltas.

810

debug_info = None

812

debug_info = None

811

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

813

if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:

812

# Assume what we received from the server is a good choice

814

# Assume what we received from the server is a good choice

813

# build delta will reuse the cache

815

# build delta will reuse the cache

814

if debug_info is not None:

816

if debug_info is not None:

815

debug_info['cached-delta.tested'] += 1

817

debug_info['cached-delta.tested'] += 1

816

good = yield (cachedelta[0],)

818

good = yield (cachedelta[0],)

817

if good is not None:

819

if good is not None:

818

if debug_info is not None:

820

if debug_info is not None:

819

debug_info['cached-delta.accepted'] += 1

821

debug_info['cached-delta.accepted'] += 1

820

yield None

822

yield None

821

return

823

return

822

# XXX cache me higher

824

if snapshot_cache is None:

823

snapshot_cache = SnapshotCache()

825

snapshot_cache = SnapshotCache()

824

groups = _rawgroups(

826

groups = _rawgroups(

825

revlog,

827

revlog,

826

p1,

828

p1,

827

p2,

829

p2,

828

cachedelta,

830

cachedelta,

829

snapshot_cache,

831

snapshot_cache,

830

)

832

)

831

for candidates in groups:

833

for candidates in groups:

832

good = yield candidates

834

good = yield candidates

833

if good is not None:

835

if good is not None:

834

break

836

break

835

837

836

# If sparse revlog is enabled, we can try to refine the available deltas

838

# If sparse revlog is enabled, we can try to refine the available deltas

837

if not revlog._sparserevlog:

839

if not revlog._sparserevlog:

838

yield None

840

yield None

839

return

841

return

840

842

841

# if we have a refinable value, try to refine it

843

# if we have a refinable value, try to refine it

842

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

844

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

843

# refine snapshot down

845

# refine snapshot down

844

previous = None

846

previous = None

845

while previous != good:

847

while previous != good:

846

previous = good

848

previous = good

847

base = revlog.deltaparent(good)

849

base = revlog.deltaparent(good)

848

if base == nullrev:

850

if base == nullrev:

849

break

851

break

850

good = yield (base,)

852

good = yield (base,)

851

# refine snapshot up

853

# refine snapshot up

852

if not snapshot_cache.snapshots:

854

if not snapshot_cache.snapshots:

853

snapshot_cache.update(revlog, good + 1)

855

snapshot_cache.update(revlog, good + 1)

854

previous = None

856

previous = None

855

while good != previous:

857

while good != previous:

856

previous = good

858

previous = good

857

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

859

children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))

858

good = yield children

860

good = yield children

859

861

860

if debug_info is not None:

862

if debug_info is not None:

861

if good is None:

863

if good is None:

862

debug_info['no-solution'] += 1

864

debug_info['no-solution'] += 1

863

865

864

yield None

866

yield None

865

867

866

868

867

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

869

def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):

868

"""Provides group of revision to be tested as delta base

870

"""Provides group of revision to be tested as delta base

869

871

870

This lower level function focus on emitting delta theorically interresting

872

This lower level function focus on emitting delta theorically interresting

871

without looking it any practical details.

873

without looking it any practical details.

872

874

873

The group order aims at providing fast or small candidates first.

875

The group order aims at providing fast or small candidates first.

874

"""

876

"""

875

gdelta = revlog._generaldelta

877

gdelta = revlog._generaldelta

876

# gate sparse behind general-delta because of issue6056

878

# gate sparse behind general-delta because of issue6056

877

sparse = gdelta and revlog._sparserevlog

879

sparse = gdelta and revlog._sparserevlog

878

curr = len(revlog)

880

curr = len(revlog)

879

prev = curr - 1

881

prev = curr - 1

880

deltachain = lambda rev: revlog._deltachain(rev)[0]

882

deltachain = lambda rev: revlog._deltachain(rev)[0]

881

883

882

if gdelta:

884

if gdelta:

883

# exclude already lazy tested base if any

885

# exclude already lazy tested base if any

884

parents = [p for p in (p1, p2) if p != nullrev]

886

parents = [p for p in (p1, p2) if p != nullrev]

885

887

886

if not revlog._deltabothparents and len(parents) == 2:

888

if not revlog._deltabothparents and len(parents) == 2:

887

parents.sort()

889

parents.sort()

888

# To minimize the chance of having to build a fulltext,

890

# To minimize the chance of having to build a fulltext,

889

# pick first whichever parent is closest to us (max rev)

891

# pick first whichever parent is closest to us (max rev)

890

yield (parents[1],)

892

yield (parents[1],)

891

# then the other one (min rev) if the first did not fit

893

# then the other one (min rev) if the first did not fit

892

yield (parents[0],)

894

yield (parents[0],)

893

elif len(parents) > 0:

895

elif len(parents) > 0:

894

# Test all parents (1 or 2), and keep the best candidate

896

# Test all parents (1 or 2), and keep the best candidate

895

yield parents

897

yield parents

896

898

897

if sparse and parents:

899

if sparse and parents:

898

if snapshot_cache is None:

900

if snapshot_cache is None:

899

# map: base-rev: [snapshot-revs]

901

# map: base-rev: [snapshot-revs]

900

snapshot_cache = SnapshotCache()

902

snapshot_cache = SnapshotCache()

901

# See if we can use an existing snapshot in the parent chains to use as

903

# See if we can use an existing snapshot in the parent chains to use as

902

# a base for a new intermediate-snapshot

904

# a base for a new intermediate-snapshot

903

#

905

#

904

# search for snapshot in parents delta chain

906

# search for snapshot in parents delta chain

905

# map: snapshot-level: snapshot-rev

907

# map: snapshot-level: snapshot-rev

906

parents_snaps = collections.defaultdict(set)

908

parents_snaps = collections.defaultdict(set)

907

candidate_chains = [deltachain(p) for p in parents]

909

candidate_chains = [deltachain(p) for p in parents]

908

for chain in candidate_chains:

910

for chain in candidate_chains:

909

for idx, s in enumerate(chain):

911

for idx, s in enumerate(chain):

910

if not revlog.issnapshot(s):

912

if not revlog.issnapshot(s):

911

break

913

break

912

parents_snaps[idx].add(s)

914

parents_snaps[idx].add(s)

913

snapfloor = min(parents_snaps[0]) + 1

915

snapfloor = min(parents_snaps[0]) + 1

914

snapshot_cache.update(revlog, snapfloor)

916

snapshot_cache.update(revlog, snapfloor)

915

# search for the highest "unrelated" revision

917

# search for the highest "unrelated" revision

916

#

918

#

917

# Adding snapshots used by "unrelated" revision increase the odd we

919

# Adding snapshots used by "unrelated" revision increase the odd we

918

# reuse an independant, yet better snapshot chain.

920

# reuse an independant, yet better snapshot chain.

919

#

921

#

920

# XXX instead of building a set of revisions, we could lazily enumerate

922

# XXX instead of building a set of revisions, we could lazily enumerate

921

# over the chains. That would be more efficient, however we stick to

923

# over the chains. That would be more efficient, however we stick to

922

# simple code for now.

924

# simple code for now.

923

all_revs = set()

925

all_revs = set()

924

for chain in candidate_chains:

926

for chain in candidate_chains:

925

all_revs.update(chain)

927

all_revs.update(chain)

926

other = None

928

other = None

927

for r in revlog.revs(prev, snapfloor):

929

for r in revlog.revs(prev, snapfloor):

928

if r not in all_revs:

930

if r not in all_revs:

929

other = r

931

other = r

930

break

932

break

931

if other is not None:

933

if other is not None:

932

# To avoid unfair competition, we won't use unrelated intermediate

934

# To avoid unfair competition, we won't use unrelated intermediate

933

# snapshot that are deeper than the ones from the parent delta

935

# snapshot that are deeper than the ones from the parent delta

934

# chain.

936

# chain.

935

max_depth = max(parents_snaps.keys())

937

max_depth = max(parents_snaps.keys())

936

chain = deltachain(other)

938

chain = deltachain(other)

937

for depth, s in enumerate(chain):

939

for depth, s in enumerate(chain):

938

if s < snapfloor:

940

if s < snapfloor:

939

continue

941

continue

940

if max_depth < depth:

942

if max_depth < depth:

941

break

943

break

942

if not revlog.issnapshot(s):

944

if not revlog.issnapshot(s):

943

break

945

break

944

parents_snaps[depth].add(s)

946

parents_snaps[depth].add(s)

945

# Test them as possible intermediate snapshot base

947

# Test them as possible intermediate snapshot base

946

# We test them from highest to lowest level. High level one are more

948

# We test them from highest to lowest level. High level one are more

947

# likely to result in small delta

949

# likely to result in small delta

948

floor = None

950

floor = None

949

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

951

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

950

siblings = set()

952

siblings = set()

951

for s in snaps:

953

for s in snaps:

952

siblings.update(snapshot_cache.snapshots[s])

954

siblings.update(snapshot_cache.snapshots[s])

953

# Before considering making a new intermediate snapshot, we check

955

# Before considering making a new intermediate snapshot, we check

954

# if an existing snapshot, children of base we consider, would be

956

# if an existing snapshot, children of base we consider, would be

955

# suitable.

957

# suitable.

956

#

958

#

957

# It give a change to reuse a delta chain "unrelated" to the

959

# It give a change to reuse a delta chain "unrelated" to the

958

# current revision instead of starting our own. Without such

960

# current revision instead of starting our own. Without such

959

# re-use, topological branches would keep reopening new chains.

961

# re-use, topological branches would keep reopening new chains.

960

# Creating more and more snapshot as the repository grow.

962

# Creating more and more snapshot as the repository grow.

961

963

962

if floor is not None:

964

if floor is not None:

963

# We only do this for siblings created after the one in our

965

# We only do this for siblings created after the one in our

964

# parent's delta chain. Those created before has less chances

966

# parent's delta chain. Those created before has less chances

965

# to be valid base since our ancestors had to create a new

967

# to be valid base since our ancestors had to create a new

966

# snapshot.

968

# snapshot.

967

siblings = [r for r in siblings if floor < r]

969

siblings = [r for r in siblings if floor < r]

968

yield tuple(sorted(siblings))

970

yield tuple(sorted(siblings))

969

# then test the base from our parent's delta chain.

971

# then test the base from our parent's delta chain.

970

yield tuple(sorted(snaps))

972

yield tuple(sorted(snaps))

971

floor = min(snaps)

973

floor = min(snaps)

972

# No suitable base found in the parent chain, search if any full

974

# No suitable base found in the parent chain, search if any full

973

# snapshots emitted since parent's base would be a suitable base for an

975

# snapshots emitted since parent's base would be a suitable base for an

974

# intermediate snapshot.

976

# intermediate snapshot.

975

#

977

#

976

# It give a chance to reuse a delta chain unrelated to the current

978

# It give a chance to reuse a delta chain unrelated to the current

977

# revisions instead of starting our own. Without such re-use,

979

# revisions instead of starting our own. Without such re-use,

978

# topological branches would keep reopening new full chains. Creating

980

# topological branches would keep reopening new full chains. Creating

979

# more and more snapshot as the repository grow.

981

# more and more snapshot as the repository grow.

980

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

982

full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]

981

yield tuple(sorted(full))

983

yield tuple(sorted(full))

982

984

983

if not sparse:

985

if not sparse:

984

# other approach failed try against prev to hopefully save us a

986

# other approach failed try against prev to hopefully save us a

985

# fulltext.

987

# fulltext.

986

yield (prev,)

988

yield (prev,)

987

989

988

990

989

class SnapshotCache:

991

class SnapshotCache:

990

__slots__ = ('snapshots', '_start_rev', '_end_rev')

992

__slots__ = ('snapshots', '_start_rev', '_end_rev')

991

993

992

def __init__(self):

994

def __init__(self):

993

self.snapshots = collections.defaultdict(set)

995

self.snapshots = collections.defaultdict(set)

994

self._start_rev = None

996

self._start_rev = None

995

self._end_rev = None

997

self._end_rev = None

996

998

997

def update(self, revlog, start_rev=0):

999

def update(self, revlog, start_rev=0):

998

"""find snapshots from start_rev to tip"""

1000

"""find snapshots from start_rev to tip"""

999

nb_revs = len(revlog)

1001

nb_revs = len(revlog)

1000

end_rev = nb_revs - 1

1002

end_rev = nb_revs - 1

1001

if start_rev > end_rev:

1003

if start_rev > end_rev:

1002

return # range is empty

1004

return # range is empty

1003

1005

1004

if self._start_rev is None:

1006

if self._start_rev is None:

1005

assert self._end_rev is None

1007

assert self._end_rev is None

1006

self._update(revlog, start_rev, end_rev)

1008

self._update(revlog, start_rev, end_rev)

1007

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1009

elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):

1008

if start_rev < self._start_rev:

1010

if start_rev < self._start_rev:

1009

self._update(revlog, start_rev, self._start_rev - 1)

1011

self._update(revlog, start_rev, self._start_rev - 1)

1010

if self._end_rev < end_rev:

1012

if self._end_rev < end_rev:

1011

self._update(revlog, self._end_rev + 1, end_rev)

1013

self._update(revlog, self._end_rev + 1, end_rev)

1012

1014

1013

if self._start_rev is None:

1015

if self._start_rev is None:

1014

assert self._end_rev is None

1016

assert self._end_rev is None

1015

self._end_rev = end_rev

1017

self._end_rev = end_rev

1016

self._start_rev = start_rev

1018

self._start_rev = start_rev

1017

else:

1019

else:

1018

self._start_rev = min(self._start_rev, start_rev)

1020

self._start_rev = min(self._start_rev, start_rev)

1019

self._end_rev = max(self._end_rev, end_rev)

1021

self._end_rev = max(self._end_rev, end_rev)

1020

assert self._start_rev <= self._end_rev, (

1022

assert self._start_rev <= self._end_rev, (

1021

self._start_rev,

1023

self._start_rev,

1022

self._end_rev,

1024

self._end_rev,

1023

)

1025

)

1024

1026

1025

def _update(self, revlog, start_rev, end_rev):

1027

def _update(self, revlog, start_rev, end_rev):

1026

"""internal method that actually do update content"""

1028

"""internal method that actually do update content"""

1027

assert self._start_rev is None or (

1029

assert self._start_rev is None or (

1028

start_rev < self._start_rev or start_rev > self._end_rev

1030

start_rev < self._start_rev or start_rev > self._end_rev

1029

), (self._start_rev, self._end_rev, start_rev, end_rev)

1031

), (self._start_rev, self._end_rev, start_rev, end_rev)

1030

assert self._start_rev is None or (

1032

assert self._start_rev is None or (

1031

end_rev < self._start_rev or end_rev > self._end_rev

1033

end_rev < self._start_rev or end_rev > self._end_rev

1032

), (self._start_rev, self._end_rev, start_rev, end_rev)

1034

), (self._start_rev, self._end_rev, start_rev, end_rev)

1033

cache = self.snapshots

1035

cache = self.snapshots

1034

if util.safehasattr(revlog.index, b'findsnapshots'):

1036

if util.safehasattr(revlog.index, b'findsnapshots'):

1035

revlog.index.findsnapshots(cache, start_rev, end_rev)

1037

revlog.index.findsnapshots(cache, start_rev, end_rev)

1036

else:

1038

else:

1037

deltaparent = revlog.deltaparent

1039

deltaparent = revlog.deltaparent

1038

issnapshot = revlog.issnapshot

1040

issnapshot = revlog.issnapshot

1039

for rev in revlog.revs(start_rev, end_rev):

1041

for rev in revlog.revs(start_rev, end_rev):

1040

if issnapshot(rev):

1042

if issnapshot(rev):

1041

cache[deltaparent(rev)].add(rev)

1043

cache[deltaparent(rev)].add(rev)

1042

1044

1043

1045

1044

class deltacomputer:

1046

class deltacomputer:

1045

def __init__(

1047

def __init__(

1046

self,

1048

self,

1047

revlog,

1049

revlog,

1048

write_debug=None,

1050

write_debug=None,

1049

debug_search=False,

1051

debug_search=False,

1050

debug_info=None,

1052

debug_info=None,

1051

):

1053

):

1052

self.revlog = revlog

1054

self.revlog = revlog

1053

self._write_debug = write_debug

1055

self._write_debug = write_debug

1054

self._debug_search = debug_search

1056

self._debug_search = debug_search

1055

self._debug_info = debug_info

1057

self._debug_info = debug_info

1058

self._snapshot_cache = SnapshotCache()

1056

1059

1057

def buildtext(self, revinfo, fh):

1060

def buildtext(self, revinfo, fh):

1058

"""Builds a fulltext version of a revision

1061

"""Builds a fulltext version of a revision

1059

1062

1060

revinfo: revisioninfo instance that contains all needed info

1063

revinfo: revisioninfo instance that contains all needed info

1061

fh: file handle to either the .i or the .d revlog file,

1064

fh: file handle to either the .i or the .d revlog file,

1062

depending on whether it is inlined or not

1065

depending on whether it is inlined or not

1063

"""

1066

"""

1064

btext = revinfo.btext

1067

btext = revinfo.btext

1065

if btext[0] is not None:

1068

if btext[0] is not None:

1066

return btext[0]

1069

return btext[0]

1067

1070

1068

revlog = self.revlog

1071

revlog = self.revlog

1069

cachedelta = revinfo.cachedelta

1072

cachedelta = revinfo.cachedelta

1070

baserev = cachedelta[0]

1073

baserev = cachedelta[0]

1071

delta = cachedelta[1]

1074

delta = cachedelta[1]

1072

1075

1073

fulltext = btext[0] = _textfromdelta(

1076

fulltext = btext[0] = _textfromdelta(

1074

fh,

1077

fh,

1075

revlog,

1078

revlog,

1076

baserev,

1079

baserev,

1077

delta,

1080

delta,

1078

revinfo.p1,

1081

revinfo.p1,

1079

revinfo.p2,

1082

revinfo.p2,

1080

revinfo.flags,

1083

revinfo.flags,

1081

revinfo.node,

1084

revinfo.node,

1082

)

1085

)

1083

return fulltext

1086

return fulltext

1084

1087

1085

def _builddeltadiff(self, base, revinfo, fh):

1088

def _builddeltadiff(self, base, revinfo, fh):

1086

revlog = self.revlog

1089

revlog = self.revlog

1087

t = self.buildtext(revinfo, fh)

1090

t = self.buildtext(revinfo, fh)

1088

if revlog.iscensored(base):

1091

if revlog.iscensored(base):

1089

# deltas based on a censored revision must replace the

1092

# deltas based on a censored revision must replace the

1090

# full content in one patch, so delta works everywhere

1093

# full content in one patch, so delta works everywhere

1091

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1094

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1092

delta = header + t

1095

delta = header + t

1093

else:

1096

else:

1094

ptext = revlog.rawdata(base, _df=fh)

1097

ptext = revlog.rawdata(base, _df=fh)

1095

delta = mdiff.textdiff(ptext, t)

1098

delta = mdiff.textdiff(ptext, t)

1096

1099

1097

return delta

1100

return delta

1098

1101

1099

def _builddeltainfo(self, revinfo, base, fh):

1102

def _builddeltainfo(self, revinfo, base, fh):

1100

# can we use the cached delta?

1103

# can we use the cached delta?

1101

revlog = self.revlog

1104

revlog = self.revlog

1102

debug_search = self._write_debug is not None and self._debug_search

1105

debug_search = self._write_debug is not None and self._debug_search

1103

chainbase = revlog.chainbase(base)

1106

chainbase = revlog.chainbase(base)

1104

if revlog._generaldelta:

1107

if revlog._generaldelta:

1105

deltabase = base

1108

deltabase = base

1106

else:

1109

else:

1107

deltabase = chainbase

1110

deltabase = chainbase

1108

snapshotdepth = None

1111

snapshotdepth = None

1109

if revlog._sparserevlog and deltabase == nullrev:

1112

if revlog._sparserevlog and deltabase == nullrev:

1110

snapshotdepth = 0

1113

snapshotdepth = 0

1111

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1114

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1112

# A delta chain should always be one full snapshot,

1115

# A delta chain should always be one full snapshot,

1113

# zero or more semi-snapshots, and zero or more deltas

1116

# zero or more semi-snapshots, and zero or more deltas

1114

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1117

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1115

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1118

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1116

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1119

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1117

delta = None

1120

delta = None

1118

if revinfo.cachedelta:

1121

if revinfo.cachedelta:

1119

cachebase = revinfo.cachedelta[0]

1122

cachebase = revinfo.cachedelta[0]

1120

# check if the diff still apply

1123

# check if the diff still apply

1121

currentbase = cachebase

1124

currentbase = cachebase

1122

while (

1125

while (

1123

currentbase != nullrev

1126

currentbase != nullrev

1124

and currentbase != base

1127

and currentbase != base

1125

and self.revlog.length(currentbase) == 0

1128

and self.revlog.length(currentbase) == 0

1126

):

1129

):

1127

currentbase = self.revlog.deltaparent(currentbase)

1130

currentbase = self.revlog.deltaparent(currentbase)

1128

if self.revlog._lazydelta and currentbase == base:

1131

if self.revlog._lazydelta and currentbase == base:

1129

delta = revinfo.cachedelta[1]

1132

delta = revinfo.cachedelta[1]

1130

if delta is None:

1133

if delta is None:

1131

delta = self._builddeltadiff(base, revinfo, fh)

1134

delta = self._builddeltadiff(base, revinfo, fh)

1132

if debug_search:

1135

if debug_search:

1133

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1136

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1134

msg %= len(delta)

1137

msg %= len(delta)

1135

self._write_debug(msg)

1138

self._write_debug(msg)

1136

# snapshotdept need to be neither None nor 0 level snapshot

1139

# snapshotdept need to be neither None nor 0 level snapshot

1137

if revlog.upperboundcomp is not None and snapshotdepth:

1140

if revlog.upperboundcomp is not None and snapshotdepth:

1138

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1141

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1139

snapshotlimit = revinfo.textlen >> snapshotdepth

1142

snapshotlimit = revinfo.textlen >> snapshotdepth

1140

if debug_search:

1143

if debug_search:

1141

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1144

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1142

msg %= lowestrealisticdeltalen

1145

msg %= lowestrealisticdeltalen

1143

self._write_debug(msg)

1146

self._write_debug(msg)

1144

if snapshotlimit < lowestrealisticdeltalen:

1147

if snapshotlimit < lowestrealisticdeltalen:

1145

if debug_search:

1148

if debug_search:

1146

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1149

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1147

self._write_debug(msg)

1150

self._write_debug(msg)

1148

return None

1151

return None

1149

if revlog.length(base) < lowestrealisticdeltalen:

1152

if revlog.length(base) < lowestrealisticdeltalen:

1150

if debug_search:

1153

if debug_search:

1151

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1154

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1152

self._write_debug(msg)

1155

self._write_debug(msg)

1153

return None

1156

return None

1154

header, data = revlog.compress(delta)

1157

header, data = revlog.compress(delta)

1155

deltalen = len(header) + len(data)

1158

deltalen = len(header) + len(data)

1156

offset = revlog.end(len(revlog) - 1)

1159

offset = revlog.end(len(revlog) - 1)

1157

dist = deltalen + offset - revlog.start(chainbase)

1160

dist = deltalen + offset - revlog.start(chainbase)

1158

chainlen, compresseddeltalen = revlog._chaininfo(base)

1161

chainlen, compresseddeltalen = revlog._chaininfo(base)

1159

chainlen += 1

1162

chainlen += 1

1160

compresseddeltalen += deltalen

1163

compresseddeltalen += deltalen

1161

1164

1162

return _deltainfo(

1165

return _deltainfo(

1163

dist,

1166

dist,

1164

deltalen,

1167

deltalen,

1165

(header, data),

1168

(header, data),

1166

deltabase,

1169

deltabase,

1167

chainbase,

1170

chainbase,

1168

chainlen,

1171

chainlen,

1169

compresseddeltalen,

1172

compresseddeltalen,

1170

snapshotdepth,

1173

snapshotdepth,

1171

)

1174

)

1172

1175

1173

def _fullsnapshotinfo(self, fh, revinfo, curr):

1176

def _fullsnapshotinfo(self, fh, revinfo, curr):

1174

rawtext = self.buildtext(revinfo, fh)

1177

rawtext = self.buildtext(revinfo, fh)

1175

data = self.revlog.compress(rawtext)

1178

data = self.revlog.compress(rawtext)

1176

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1179

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1177

deltabase = chainbase = curr

1180

deltabase = chainbase = curr

1178

snapshotdepth = 0

1181

snapshotdepth = 0

1179

chainlen = 1

1182

chainlen = 1

1180

1183

1181

return _deltainfo(

1184

return _deltainfo(

1182

dist,

1185

dist,

1183

deltalen,

1186

deltalen,

1184

data,

1187

data,

1185

deltabase,

1188

deltabase,

1186

chainbase,

1189

chainbase,

1187

chainlen,

1190

chainlen,

1188

compresseddeltalen,

1191

compresseddeltalen,

1189

snapshotdepth,

1192

snapshotdepth,

1190

)

1193

)

1191

1194

1192

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1195

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1193

"""Find an acceptable delta against a candidate revision

1196

"""Find an acceptable delta against a candidate revision

1194

1197

1195

revinfo: information about the revision (instance of _revisioninfo)

1198

revinfo: information about the revision (instance of _revisioninfo)

1196

fh: file handle to either the .i or the .d revlog file,

1199

fh: file handle to either the .i or the .d revlog file,

1197

depending on whether it is inlined or not

1200

depending on whether it is inlined or not

1198

1201

1199

Returns the first acceptable candidate revision, as ordered by

1202

Returns the first acceptable candidate revision, as ordered by

1200

_candidategroups

1203

_candidategroups

1201

1204

1202

If no suitable deltabase is found, we return delta info for a full

1205

If no suitable deltabase is found, we return delta info for a full

1203

snapshot.

1206

snapshot.

1204

1207

1205

`excluded_bases` is an optional set of revision that cannot be used as

1208

`excluded_bases` is an optional set of revision that cannot be used as

1206

a delta base. Use this to recompute delta suitable in censor or strip

1209

a delta base. Use this to recompute delta suitable in censor or strip

1207

context.

1210

context.

1208

"""

1211

"""

1209

if target_rev is None:

1212

if target_rev is None:

1210

target_rev = len(self.revlog)

1213

target_rev = len(self.revlog)

1211

1214

1212

if not revinfo.textlen:

1215

if not revinfo.textlen:

1213

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1216

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1214

1217

1215

if excluded_bases is None:

1218

if excluded_bases is None:

1216

excluded_bases = set()

1219

excluded_bases = set()

1217

1220

1218

# no delta for flag processor revision (see "candelta" for why)

1221

# no delta for flag processor revision (see "candelta" for why)

1219

# not calling candelta since only one revision needs test, also to

1222

# not calling candelta since only one revision needs test, also to

1220

# avoid overhead fetching flags again.

1223

# avoid overhead fetching flags again.

1221

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1224

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1222

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1225

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1223

1226

1224

gather_debug = (

1227

gather_debug = (

1225

self._write_debug is not None or self._debug_info is not None

1228

self._write_debug is not None or self._debug_info is not None

1226

)

1229

)

1227

debug_search = self._write_debug is not None and self._debug_search

1230

debug_search = self._write_debug is not None and self._debug_search

1228

1231

1229

if gather_debug:

1232

if gather_debug:

1230

start = util.timer()

1233

start = util.timer()

1231

1234

1232

# count the number of different delta we tried (for debug purpose)

1235

# count the number of different delta we tried (for debug purpose)

1233

dbg_try_count = 0

1236

dbg_try_count = 0

1234

# count the number of "search round" we did. (for debug purpose)

1237

# count the number of "search round" we did. (for debug purpose)

1235

dbg_try_rounds = 0

1238

dbg_try_rounds = 0

1236

dbg_type = b'unknown'

1239

dbg_type = b'unknown'

1237

1240

1238

cachedelta = revinfo.cachedelta

1241

cachedelta = revinfo.cachedelta

1239

p1 = revinfo.p1

1242

p1 = revinfo.p1

1240

p2 = revinfo.p2

1243

p2 = revinfo.p2

1241

revlog = self.revlog

1244

revlog = self.revlog

1242

1245

1243

deltainfo = None

1246

deltainfo = None

1244

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1247

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1245

1248

1246

if gather_debug:

1249

if gather_debug:

1247

if p1r != nullrev:

1250

if p1r != nullrev:

1248

p1_chain_len = revlog._chaininfo(p1r)[0]

1251

p1_chain_len = revlog._chaininfo(p1r)[0]

1249

else:

1252

else:

1250

p1_chain_len = -1

1253

p1_chain_len = -1

1251

if p2r != nullrev:

1254

if p2r != nullrev:

1252

p2_chain_len = revlog._chaininfo(p2r)[0]

1255

p2_chain_len = revlog._chaininfo(p2r)[0]

1253

else:

1256

else:

1254

p2_chain_len = -1

1257

p2_chain_len = -1

1255

if debug_search:

1258

if debug_search:

1256

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1259

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1257

msg %= target_rev

1260

msg %= target_rev

1258

self._write_debug(msg)

1261

self._write_debug(msg)

1259

1262

1260

groups = _candidategroups(

1263

groups = _candidategroups(

1261

self.revlog,

1264

self.revlog,

1262

revinfo.textlen,

1265

revinfo.textlen,

1263

p1r,

1266

p1r,

1264

p2r,

1267

p2r,

1265

cachedelta,

1268

cachedelta,

1266

excluded_bases,

1269

excluded_bases,

1267

target_rev,

1270

target_rev,

1271

snapshot_cache=self._snapshot_cache,

1268

)

1272

)

1269

candidaterevs = next(groups)

1273

candidaterevs = next(groups)

1270

while candidaterevs is not None:

1274

while candidaterevs is not None:

1271

dbg_try_rounds += 1

1275

dbg_try_rounds += 1

1272

if debug_search:

1276

if debug_search:

1273

prev = None

1277

prev = None

1274

if deltainfo is not None:

1278

if deltainfo is not None:

1275

prev = deltainfo.base

1279

prev = deltainfo.base

1276

1280

1277

if (

1281

if (

1278

cachedelta is not None

1282

cachedelta is not None

1279

and len(candidaterevs) == 1

1283

and len(candidaterevs) == 1

1280

and cachedelta[0] in candidaterevs

1284

and cachedelta[0] in candidaterevs

1281

):

1285

):

1282

round_type = b"cached-delta"

1286

round_type = b"cached-delta"

1283

elif p1 in candidaterevs or p2 in candidaterevs:

1287

elif p1 in candidaterevs or p2 in candidaterevs:

1284

round_type = b"parents"

1288

round_type = b"parents"

1285

elif prev is not None and all(c < prev for c in candidaterevs):

1289

elif prev is not None and all(c < prev for c in candidaterevs):

1286

round_type = b"refine-down"

1290

round_type = b"refine-down"

1287

elif prev is not None and all(c > prev for c in candidaterevs):

1291

elif prev is not None and all(c > prev for c in candidaterevs):

1288

round_type = b"refine-up"

1292

round_type = b"refine-up"

1289

else:

1293

else:

1290

round_type = b"search-down"

1294

round_type = b"search-down"

1291

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1295

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1292

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1296

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1293

self._write_debug(msg)

1297

self._write_debug(msg)

1294

nominateddeltas = []

1298

nominateddeltas = []

1295

if deltainfo is not None:

1299

if deltainfo is not None:

1296

if debug_search:

1300

if debug_search:

1297

msg = (

1301

msg = (

1298

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1302

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1299

)

1303

)

1300

msg %= (deltainfo.base, deltainfo.deltalen)

1304

msg %= (deltainfo.base, deltainfo.deltalen)

1301

self._write_debug(msg)

1305

self._write_debug(msg)

1302

# if we already found a good delta,

1306

# if we already found a good delta,

1303

# challenge it against refined candidates

1307

# challenge it against refined candidates

1304

nominateddeltas.append(deltainfo)

1308

nominateddeltas.append(deltainfo)

1305

for candidaterev in candidaterevs:

1309

for candidaterev in candidaterevs:

1306

if debug_search:

1310

if debug_search:

1307

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1311

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1308

msg %= candidaterev

1312

msg %= candidaterev

1309

self._write_debug(msg)

1313

self._write_debug(msg)

1310

candidate_type = None

1314

candidate_type = None

1311

if candidaterev == p1:

1315

if candidaterev == p1:

1312

candidate_type = b"p1"

1316

candidate_type = b"p1"

1313

elif candidaterev == p2:

1317

elif candidaterev == p2:

1314

candidate_type = b"p2"

1318

candidate_type = b"p2"

1315

elif self.revlog.issnapshot(candidaterev):

1319

elif self.revlog.issnapshot(candidaterev):

1316

candidate_type = b"snapshot-%d"

1320

candidate_type = b"snapshot-%d"

1317

candidate_type %= self.revlog.snapshotdepth(

1321

candidate_type %= self.revlog.snapshotdepth(

1318

candidaterev

1322

candidaterev

1319

)

1323

)

1320

1324

1321

if candidate_type is not None:

1325

if candidate_type is not None:

1322

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1326

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1323

msg %= candidate_type

1327

msg %= candidate_type

1324

self._write_debug(msg)

1328

self._write_debug(msg)

1325

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1329

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1326

msg %= self.revlog.length(candidaterev)

1330

msg %= self.revlog.length(candidaterev)

1327

self._write_debug(msg)

1331

self._write_debug(msg)

1328

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1332

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1329

msg %= self.revlog.deltaparent(candidaterev)

1333

msg %= self.revlog.deltaparent(candidaterev)

1330

self._write_debug(msg)

1334

self._write_debug(msg)

1331

1335

1332

dbg_try_count += 1

1336

dbg_try_count += 1

1333

1337

1334

if debug_search:

1338

if debug_search:

1335

delta_start = util.timer()

1339

delta_start = util.timer()

1336

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1340

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1337

if debug_search:

1341

if debug_search:

1338

delta_end = util.timer()

1342

delta_end = util.timer()

1339

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1343

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1340

msg %= delta_end - delta_start

1344

msg %= delta_end - delta_start

1341

self._write_debug(msg)

1345

self._write_debug(msg)

1342

if candidatedelta is not None:

1346

if candidatedelta is not None:

1343

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1347

if is_good_delta_info(self.revlog, candidatedelta, revinfo):

1344

if debug_search:

1348

if debug_search:

1345

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1349

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1346

msg %= candidatedelta.deltalen

1350

msg %= candidatedelta.deltalen

1347

self._write_debug(msg)

1351

self._write_debug(msg)

1348

nominateddeltas.append(candidatedelta)

1352

nominateddeltas.append(candidatedelta)

1349

elif debug_search:

1353

elif debug_search:

1350

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1354

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1351

msg %= candidatedelta.deltalen

1355

msg %= candidatedelta.deltalen

1352

self._write_debug(msg)

1356

self._write_debug(msg)

1353

elif debug_search:

1357

elif debug_search:

1354

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1358

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1355

self._write_debug(msg)

1359

self._write_debug(msg)

1356

if nominateddeltas:

1360

if nominateddeltas:

1357

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1361

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1358

if deltainfo is not None:

1362

if deltainfo is not None:

1359

candidaterevs = groups.send(deltainfo.base)

1363

candidaterevs = groups.send(deltainfo.base)

1360

else:

1364

else:

1361

candidaterevs = next(groups)

1365

candidaterevs = next(groups)

1362

1366

1363

if deltainfo is None:

1367

if deltainfo is None:

1364

dbg_type = b"full"

1368

dbg_type = b"full"

1365

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1369

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1366

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1370

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1367

dbg_type = b"snapshot"

1371

dbg_type = b"snapshot"

1368

else:

1372

else:

1369

dbg_type = b"delta"

1373

dbg_type = b"delta"

1370

1374

1371

if gather_debug:

1375

if gather_debug:

1372

end = util.timer()

1376

end = util.timer()

1373

used_cached = (

1377

used_cached = (

1374

cachedelta is not None

1378

cachedelta is not None

1375

and dbg_try_rounds == 1

1379

and dbg_try_rounds == 1

1376

and dbg_try_count == 1

1380

and dbg_try_count == 1

1377

and deltainfo.base == cachedelta[0]

1381

and deltainfo.base == cachedelta[0]

1378

)

1382

)

1379

dbg = {

1383

dbg = {

1380

'duration': end - start,

1384

'duration': end - start,

1381

'revision': target_rev,

1385

'revision': target_rev,

1382

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1386

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1383

'search_round_count': dbg_try_rounds,

1387

'search_round_count': dbg_try_rounds,

1384

'using-cached-base': used_cached,

1388

'using-cached-base': used_cached,

1385

'delta_try_count': dbg_try_count,

1389

'delta_try_count': dbg_try_count,

1386

'type': dbg_type,

1390

'type': dbg_type,

1387

'p1-chain-len': p1_chain_len,

1391

'p1-chain-len': p1_chain_len,

1388

'p2-chain-len': p2_chain_len,

1392

'p2-chain-len': p2_chain_len,

1389

}

1393

}

1390

if (

1394

if (

1391

deltainfo.snapshotdepth # pytype: disable=attribute-error

1395

deltainfo.snapshotdepth # pytype: disable=attribute-error

1392

is not None

1396

is not None

1393

):

1397

):

1394

dbg[

1398

dbg[

1395

'snapshot-depth'

1399

'snapshot-depth'

1396

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1400

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1397

else:

1401

else:

1398

dbg['snapshot-depth'] = 0

1402

dbg['snapshot-depth'] = 0

1399

target_revlog = b"UNKNOWN"

1403

target_revlog = b"UNKNOWN"

1400

target_type = self.revlog.target[0]

1404

target_type = self.revlog.target[0]

1401

target_key = self.revlog.target[1]

1405

target_key = self.revlog.target[1]

1402

if target_type == KIND_CHANGELOG:

1406

if target_type == KIND_CHANGELOG:

1403

target_revlog = b'CHANGELOG:'

1407

target_revlog = b'CHANGELOG:'

1404

elif target_type == KIND_MANIFESTLOG:

1408

elif target_type == KIND_MANIFESTLOG:

1405

target_revlog = b'MANIFESTLOG:'

1409

target_revlog = b'MANIFESTLOG:'

1406

if target_key:

1410

if target_key:

1407

target_revlog += b'%s:' % target_key

1411

target_revlog += b'%s:' % target_key

1408

elif target_type == KIND_FILELOG:

1412

elif target_type == KIND_FILELOG:

1409

target_revlog = b'FILELOG:'

1413

target_revlog = b'FILELOG:'

1410

if target_key:

1414

if target_key:

1411

target_revlog += b'%s:' % target_key

1415

target_revlog += b'%s:' % target_key

1412

dbg['target-revlog'] = target_revlog

1416

dbg['target-revlog'] = target_revlog

1413

1417

1414

if self._debug_info is not None:

1418

if self._debug_info is not None:

1415

self._debug_info.append(dbg)

1419

self._debug_info.append(dbg)

1416

1420

1417

if self._write_debug is not None:

1421

if self._write_debug is not None:

1418

msg = (

1422

msg = (

1419

b"DBG-DELTAS:"

1423

b"DBG-DELTAS:"

1420

b" %-12s"

1424

b" %-12s"

1421

b" rev=%d:"

1425

b" rev=%d:"

1422

b" delta-base=%d"

1426

b" delta-base=%d"

1423

b" is-cached=%d"

1427

b" is-cached=%d"

1424

b" - search-rounds=%d"

1428

b" - search-rounds=%d"

1425

b" try-count=%d"

1429

b" try-count=%d"

1426

b" - delta-type=%-6s"

1430

b" - delta-type=%-6s"

1427

b" snap-depth=%d"

1431

b" snap-depth=%d"

1428

b" - p1-chain-length=%d"

1432

b" - p1-chain-length=%d"

1429

b" p2-chain-length=%d"

1433

b" p2-chain-length=%d"

1430

b" - duration=%f"

1434

b" - duration=%f"

1431

b"\n"

1435

b"\n"

1432

)

1436

)

1433

msg %= (

1437

msg %= (

1434

dbg["target-revlog"],

1438

dbg["target-revlog"],

1435

dbg["revision"],

1439

dbg["revision"],

1436

dbg["delta-base"],

1440

dbg["delta-base"],

1437

dbg["using-cached-base"],

1441

dbg["using-cached-base"],

1438

dbg["search_round_count"],

1442

dbg["search_round_count"],

1439

dbg["delta_try_count"],

1443

dbg["delta_try_count"],

1440

dbg["type"],

1444

dbg["type"],

1441

dbg["snapshot-depth"],

1445

dbg["snapshot-depth"],

1442

dbg["p1-chain-len"],

1446

dbg["p1-chain-len"],

1443

dbg["p2-chain-len"],

1447

dbg["p2-chain-len"],

1444

dbg["duration"],

1448

dbg["duration"],

1445

)

1449

)

1446

self._write_debug(msg)

1450

self._write_debug(msg)

1447

return deltainfo

1451

return deltainfo

1448

1452

1449

1453

1450

def delta_compression(default_compression_header, deltainfo):

1454

def delta_compression(default_compression_header, deltainfo):

1451

"""return (COMPRESSION_MODE, deltainfo)

1455

"""return (COMPRESSION_MODE, deltainfo)

1452

1456

1453

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1457

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1454

compression.

1458

compression.

1455

"""

1459

"""

1456

h, d = deltainfo.data

1460

h, d = deltainfo.data

1457

compression_mode = COMP_MODE_INLINE

1461

compression_mode = COMP_MODE_INLINE

1458

if not h and not d:

1462

if not h and not d:

1459

# not data to store at all... declare them uncompressed

1463

# not data to store at all... declare them uncompressed

1460

compression_mode = COMP_MODE_PLAIN

1464

compression_mode = COMP_MODE_PLAIN

1461

elif not h:

1465

elif not h:

1462

t = d[0:1]

1466

t = d[0:1]

1463

if t == b'\0':

1467

if t == b'\0':

1464

compression_mode = COMP_MODE_PLAIN

1468

compression_mode = COMP_MODE_PLAIN

1465

elif t == default_compression_header:

1469

elif t == default_compression_header:

1466

compression_mode = COMP_MODE_DEFAULT

1470

compression_mode = COMP_MODE_DEFAULT

1467

elif h == b'u':

1471

elif h == b'u':

1468

# we have a more efficient way to declare uncompressed

1472

# we have a more efficient way to declare uncompressed

1469

h = b''

1473

h = b''

1470

compression_mode = COMP_MODE_PLAIN

1474

compression_mode = COMP_MODE_PLAIN

1471

deltainfo = drop_u_compression(deltainfo)

1475

deltainfo = drop_u_compression(deltainfo)

1472

return compression_mode, deltainfo

1476

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 DELTA_BASE_REUSE_NO,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def is_good_delta_info(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
+                snapshot_cache=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 group_chunk_size = revlog._candidate_group_chunk_size
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
+                    snapshot_cache=snapshot_cache,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             tested.add(rev)
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             tested.add(rev)
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             tested.add(rev)
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             tested.add(rev)
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             tested.add(rev)
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             tested.add(rev)
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             tested.add(rev)
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     tested.add(rev)
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     tested.add(rev)
                                     continue
                         group.append(rev)
                     if group:
                         # When the size of the candidate group is big, it can result in a
                         # quite significant performance impact. To reduce this, we can send
                         # them in smaller batches until the new batch does not provide any
                         # improvements.
                         #
                         # This might reduce the overall efficiency of the compression in
                         # some corner cases, but that should also prevent very pathological
                         # cases from being an issue. (eg. 20 000 candidates).
                         #
                         # XXX note that the ordering of the group becomes important as it
                         # now impacts the final result. The current order is unprocessed
                         # and can be improved.
                         if group_chunk_size == 0:
                             tested.update(group)
                             good = yield tuple(group)
                         else:
                             prev_good = good
                             for start in range(0, len(group), group_chunk_size):
                                 sub_group = group[start : start + group_chunk_size]
                                 tested.update(sub_group)
                                 good = yield tuple(sub_group)
                                 if prev_good == good:
                                     break
                 yield None
-            def _refinedgroups(revlog, p1, p2, cachedelta):
+            def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
-                # XXX cache me higher
+                if snapshot_cache is None:
-                snapshot_cache = SnapshotCache()
+                    snapshot_cache = SnapshotCache()
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshot_cache,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshot_cache.snapshots:
                         snapshot_cache.update(revlog, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshot_cache is None:
                         # map: base-rev: [snapshot-revs]
                         snapshot_cache = SnapshotCache()
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     snapshot_cache.update(revlog, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshot_cache.snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
                     yield tuple(sorted(full))
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class SnapshotCache:
                 __slots__ = ('snapshots', '_start_rev', '_end_rev')
                 def __init__(self):
                     self.snapshots = collections.defaultdict(set)
                     self._start_rev = None
                     self._end_rev = None
                 def update(self, revlog, start_rev=0):
                     """find snapshots from start_rev to tip"""
                     nb_revs = len(revlog)
                     end_rev = nb_revs - 1
                     if start_rev > end_rev:
                         return  # range is empty
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._update(revlog, start_rev, end_rev)
                     elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
                         if start_rev < self._start_rev:
                             self._update(revlog, start_rev, self._start_rev - 1)
                         if self._end_rev < end_rev:
                             self._update(revlog, self._end_rev + 1, end_rev)
                     if self._start_rev is None:
                         assert self._end_rev is None
                         self._end_rev = end_rev
                         self._start_rev = start_rev
                     else:
                         self._start_rev = min(self._start_rev, start_rev)
                         self._end_rev = max(self._end_rev, end_rev)
                     assert self._start_rev <= self._end_rev, (
                         self._start_rev,
                         self._end_rev,
                     )
                 def _update(self, revlog, start_rev, end_rev):
                     """internal method that actually do update content"""
                     assert self._start_rev is None or (
                         start_rev < self._start_rev or start_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     assert self._start_rev is None or (
                         end_rev < self._start_rev or end_rev > self._end_rev
                     ), (self._start_rev, self._end_rev, start_rev, end_rev)
                     cache = self.snapshots
                     if util.safehasattr(revlog.index, b'findsnapshots'):
                         revlog.index.findsnapshots(cache, start_rev, end_rev)
                     else:
                         deltaparent = revlog.deltaparent
                         issnapshot = revlog.issnapshot
                         for rev in revlog.revs(start_rev, end_rev):
                             if issnapshot(rev):
                                 cache[deltaparent(rev)].add(rev)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
+                    self._snapshot_cache = SnapshotCache()
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase = revinfo.cachedelta[0]
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
+                        snapshot_cache=self._snapshot_cache,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         used_cached = (
                             cachedelta is not None
                             and dbg_try_rounds == 1
                             and dbg_try_count == 1
                             and deltainfo.base == cachedelta[0]
                         )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo