upstream/mercurial-mirror Commit - r48249:93f4e183

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import nullrev

16

from ..node import nullrev

17

from ..i18n import _

17

from ..i18n import _

18

from ..pycompat import getattr

18

from ..pycompat import getattr

19

20

from .constants import (

20

from .constants import (

21

COMP_MODE_DEFAULT,

21

COMP_MODE_DEFAULT,

22

COMP_MODE_INLINE,

22

COMP_MODE_INLINE,

23

COMP_MODE_PLAIN,

23

COMP_MODE_PLAIN,

24

REVIDX_ISCENSORED,

24

REVIDX_ISCENSORED,

25

REVIDX_RAWTEXT_CHANGING_FLAGS,

25

REVIDX_RAWTEXT_CHANGING_FLAGS,

26

)

26

)

27

28

from ..thirdparty import attr

28

from ..thirdparty import attr

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

util,

33

util,

34

)

34

)

35

36

from . import flagutil

36

from . import flagutil

37

38

# maximum <delta-chain-data>/<revision-text-length> ratio

38

# maximum <delta-chain-data>/<revision-text-length> ratio

39

LIMIT_DELTA2TEXT = 2

39

LIMIT_DELTA2TEXT = 2

40

41

42

class _testrevlog(object):

42

class _testrevlog(object):

43

"""minimalist fake revlog to use in doctests"""

43

"""minimalist fake revlog to use in doctests"""

44

45

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

45

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

46

"""data is an list of revision payload boundaries"""

46

"""data is an list of revision payload boundaries"""

47

self._data = data

47

self._data = data

48

self._srdensitythreshold = density

48

self._srdensitythreshold = density

49

self._srmingapsize = mingap

49

self._srmingapsize = mingap

50

self._snapshot = set(snapshot)

50

self._snapshot = set(snapshot)

51

self.index = None

51

self.index = None

52

53

def start(self, rev):

53

def start(self, rev):

54

if rev == nullrev:

54

if rev == nullrev:

55

return 0

55

return 0

56

if rev == 0:

56

if rev == 0:

57

return 0

57

return 0

58

return self._data[rev - 1]

58

return self._data[rev - 1]

59

60

def end(self, rev):

60

def end(self, rev):

61

if rev == nullrev:

61

if rev == nullrev:

62

return 0

62

return 0

63

return self._data[rev]

63

return self._data[rev]

64

65

def length(self, rev):

65

def length(self, rev):

66

return self.end(rev) - self.start(rev)

66

return self.end(rev) - self.start(rev)

67

68

def __len__(self):

68

def __len__(self):

69

return len(self._data)

69

return len(self._data)

70

71

def issnapshot(self, rev):

71

def issnapshot(self, rev):

72

if rev == nullrev:

72

if rev == nullrev:

73

return True

73

return True

74

return rev in self._snapshot

74

return rev in self._snapshot

75

76

77

def slicechunk(revlog, revs, targetsize=None):

77

def slicechunk(revlog, revs, targetsize=None):

78

"""slice revs to reduce the amount of unrelated data to be read from disk.

78

"""slice revs to reduce the amount of unrelated data to be read from disk.

79

80

``revs`` is sliced into groups that should be read in one time.

80

``revs`` is sliced into groups that should be read in one time.

81

Assume that revs are sorted.

81

Assume that revs are sorted.

82

83

The initial chunk is sliced until the overall density (payload/chunks-span

83

The initial chunk is sliced until the overall density (payload/chunks-span

84

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

84

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

85

`revlog._srmingapsize` is skipped.

85

`revlog._srmingapsize` is skipped.

86

87

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

87

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

88

For consistency with other slicing choice, this limit won't go lower than

88

For consistency with other slicing choice, this limit won't go lower than

89

`revlog._srmingapsize`.

89

`revlog._srmingapsize`.

90

91

If individual revisions chunk are larger than this limit, they will still

91

If individual revisions chunk are larger than this limit, they will still

92

be raised individually.

92

be raised individually.

93

94

>>> data = [

94

>>> data = [

95

... 5, #00 (5)

95

... 5, #00 (5)

96

... 10, #01 (5)

96

... 10, #01 (5)

97

... 12, #02 (2)

97

... 12, #02 (2)

98

... 12, #03 (empty)

98

... 12, #03 (empty)

99

... 27, #04 (15)

99

... 27, #04 (15)

100

... 31, #05 (4)

100

... 31, #05 (4)

101

... 31, #06 (empty)

101

... 31, #06 (empty)

102

... 42, #07 (11)

102

... 42, #07 (11)

103

... 47, #08 (5)

103

... 47, #08 (5)

104

... 47, #09 (empty)

104

... 47, #09 (empty)

105

... 48, #10 (1)

105

... 48, #10 (1)

106

... 51, #11 (3)

106

... 51, #11 (3)

107

... 74, #12 (23)

107

... 74, #12 (23)

108

... 85, #13 (11)

108

... 85, #13 (11)

109

... 86, #14 (1)

109

... 86, #14 (1)

110

... 91, #15 (5)

110

... 91, #15 (5)

111

... ]

111

... ]

112

>>> revlog = _testrevlog(data, snapshot=range(16))

112

>>> revlog = _testrevlog(data, snapshot=range(16))

113

114

>>> list(slicechunk(revlog, list(range(16))))

114

>>> list(slicechunk(revlog, list(range(16))))

115

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

115

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

116

>>> list(slicechunk(revlog, [0, 15]))

116

>>> list(slicechunk(revlog, [0, 15]))

117

[[0], [15]]

117

[[0], [15]]

118

>>> list(slicechunk(revlog, [0, 11, 15]))

118

>>> list(slicechunk(revlog, [0, 11, 15]))

119

[[0], [11], [15]]

119

[[0], [11], [15]]

120

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

120

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

121

[[0], [11, 13, 15]]

121

[[0], [11, 13, 15]]

122

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

122

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

123

[[1, 2], [5, 8, 10, 11], [14]]

123

[[1, 2], [5, 8, 10, 11], [14]]

124

125

Slicing with a maximum chunk size

125

Slicing with a maximum chunk size

126

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

126

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

127

[[0], [11], [13], [15]]

127

[[0], [11], [13], [15]]

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

129

[[0], [11], [13, 15]]

129

[[0], [11], [13, 15]]

130

131

Slicing involving nullrev

131

Slicing involving nullrev

132

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

132

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

133

[[-1, 0], [11], [13, 15]]

133

[[-1, 0], [11], [13, 15]]

134

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

134

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

135

[[-1], [13], [15]]

135

[[-1], [13], [15]]

136

"""

136

"""

137

if targetsize is not None:

137

if targetsize is not None:

138

targetsize = max(targetsize, revlog._srmingapsize)

138

targetsize = max(targetsize, revlog._srmingapsize)

139

# targetsize should not be specified when evaluating delta candidates:

139

# targetsize should not be specified when evaluating delta candidates:

140

# * targetsize is used to ensure we stay within specification when reading,

140

# * targetsize is used to ensure we stay within specification when reading,

141

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

141

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

142

if densityslicing is None:

142

if densityslicing is None:

143

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

143

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

144

for chunk in densityslicing(

144

for chunk in densityslicing(

145

revs, revlog._srdensitythreshold, revlog._srmingapsize

145

revs, revlog._srdensitythreshold, revlog._srmingapsize

146

):

146

):

147

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

147

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

148

yield subchunk

148

yield subchunk

149

150

151

def _slicechunktosize(revlog, revs, targetsize=None):

151

def _slicechunktosize(revlog, revs, targetsize=None):

152

"""slice revs to match the target size

152

"""slice revs to match the target size

153

154

This is intended to be used on chunk that density slicing selected by that

154

This is intended to be used on chunk that density slicing selected by that

155

are still too large compared to the read garantee of revlog. This might

155

are still too large compared to the read garantee of revlog. This might

156

happens when "minimal gap size" interrupted the slicing or when chain are

156

happens when "minimal gap size" interrupted the slicing or when chain are

157

built in a way that create large blocks next to each other.

157

built in a way that create large blocks next to each other.

158

159

>>> data = [

159

>>> data = [

160

... 3, #0 (3)

160

... 3, #0 (3)

161

... 5, #1 (2)

161

... 5, #1 (2)

162

... 6, #2 (1)

162

... 6, #2 (1)

163

... 8, #3 (2)

163

... 8, #3 (2)

164

... 8, #4 (empty)

164

... 8, #4 (empty)

165

... 11, #5 (3)

165

... 11, #5 (3)

166

... 12, #6 (1)

166

... 12, #6 (1)

167

... 13, #7 (1)

167

... 13, #7 (1)

168

... 14, #8 (1)

168

... 14, #8 (1)

169

... ]

169

... ]

170

171

== All snapshots cases ==

171

== All snapshots cases ==

172

>>> revlog = _testrevlog(data, snapshot=range(9))

172

>>> revlog = _testrevlog(data, snapshot=range(9))

173

174

Cases where chunk is already small enough

174

Cases where chunk is already small enough

175

>>> list(_slicechunktosize(revlog, [0], 3))

175

>>> list(_slicechunktosize(revlog, [0], 3))

176

[[0]]

176

[[0]]

177

>>> list(_slicechunktosize(revlog, [6, 7], 3))

177

>>> list(_slicechunktosize(revlog, [6, 7], 3))

178

[[6, 7]]

178

[[6, 7]]

179

>>> list(_slicechunktosize(revlog, [0], None))

179

>>> list(_slicechunktosize(revlog, [0], None))

180

[[0]]

180

[[0]]

181

>>> list(_slicechunktosize(revlog, [6, 7], None))

181

>>> list(_slicechunktosize(revlog, [6, 7], None))

182

[[6, 7]]

182

[[6, 7]]

183

184

cases where we need actual slicing

184

cases where we need actual slicing

185

>>> list(_slicechunktosize(revlog, [0, 1], 3))

185

>>> list(_slicechunktosize(revlog, [0, 1], 3))

186

[[0], [1]]

186

[[0], [1]]

187

>>> list(_slicechunktosize(revlog, [1, 3], 3))

187

>>> list(_slicechunktosize(revlog, [1, 3], 3))

188

[[1], [3]]

188

[[1], [3]]

189

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

189

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

190

[[1, 2], [3]]

190

[[1, 2], [3]]

191

>>> list(_slicechunktosize(revlog, [3, 5], 3))

191

>>> list(_slicechunktosize(revlog, [3, 5], 3))

192

[[3], [5]]

192

[[3], [5]]

193

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

193

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

194

[[3], [5]]

194

[[3], [5]]

195

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

195

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

196

[[5], [6, 7, 8]]

196

[[5], [6, 7, 8]]

197

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

197

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

198

[[0], [1, 2], [3], [5], [6, 7, 8]]

198

[[0], [1, 2], [3], [5], [6, 7, 8]]

199

200

Case with too large individual chunk (must return valid chunk)

200

Case with too large individual chunk (must return valid chunk)

201

>>> list(_slicechunktosize(revlog, [0, 1], 2))

201

>>> list(_slicechunktosize(revlog, [0, 1], 2))

202

[[0], [1]]

202

[[0], [1]]

203

>>> list(_slicechunktosize(revlog, [1, 3], 1))

203

>>> list(_slicechunktosize(revlog, [1, 3], 1))

204

[[1], [3]]

204

[[1], [3]]

205

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

205

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

206

[[3], [5]]

206

[[3], [5]]

207

208

== No Snapshot cases ==

208

== No Snapshot cases ==

209

>>> revlog = _testrevlog(data)

209

>>> revlog = _testrevlog(data)

210

211

Cases where chunk is already small enough

211

Cases where chunk is already small enough

212

>>> list(_slicechunktosize(revlog, [0], 3))

212

>>> list(_slicechunktosize(revlog, [0], 3))

213

[[0]]

213

[[0]]

214

>>> list(_slicechunktosize(revlog, [6, 7], 3))

214

>>> list(_slicechunktosize(revlog, [6, 7], 3))

215

[[6, 7]]

215

[[6, 7]]

216

>>> list(_slicechunktosize(revlog, [0], None))

216

>>> list(_slicechunktosize(revlog, [0], None))

217

[[0]]

217

[[0]]

218

>>> list(_slicechunktosize(revlog, [6, 7], None))

218

>>> list(_slicechunktosize(revlog, [6, 7], None))

219

[[6, 7]]

219

[[6, 7]]

220

221

cases where we need actual slicing

221

cases where we need actual slicing

222

>>> list(_slicechunktosize(revlog, [0, 1], 3))

222

>>> list(_slicechunktosize(revlog, [0, 1], 3))

223

[[0], [1]]

223

[[0], [1]]

224

>>> list(_slicechunktosize(revlog, [1, 3], 3))

224

>>> list(_slicechunktosize(revlog, [1, 3], 3))

225

[[1], [3]]

225

[[1], [3]]

226

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

226

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

227

[[1], [2, 3]]

227

[[1], [2, 3]]

228

>>> list(_slicechunktosize(revlog, [3, 5], 3))

228

>>> list(_slicechunktosize(revlog, [3, 5], 3))

229

[[3], [5]]

229

[[3], [5]]

230

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

230

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

231

[[3], [4, 5]]

231

[[3], [4, 5]]

232

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

232

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

233

[[5], [6, 7, 8]]

233

[[5], [6, 7, 8]]

234

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

234

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

235

[[0], [1, 2], [3], [5], [6, 7, 8]]

235

[[0], [1, 2], [3], [5], [6, 7, 8]]

236

237

Case with too large individual chunk (must return valid chunk)

237

Case with too large individual chunk (must return valid chunk)

238

>>> list(_slicechunktosize(revlog, [0, 1], 2))

238

>>> list(_slicechunktosize(revlog, [0, 1], 2))

239

[[0], [1]]

239

[[0], [1]]

240

>>> list(_slicechunktosize(revlog, [1, 3], 1))

240

>>> list(_slicechunktosize(revlog, [1, 3], 1))

241

[[1], [3]]

241

[[1], [3]]

242

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

242

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

243

[[3], [5]]

243

[[3], [5]]

244

245

== mixed case ==

245

== mixed case ==

246

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

246

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

247

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

247

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

248

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

248

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

249

"""

249

"""

250

assert targetsize is None or 0 <= targetsize

250

assert targetsize is None or 0 <= targetsize

251

startdata = revlog.start(revs[0])

251

startdata = revlog.start(revs[0])

252

enddata = revlog.end(revs[-1])

252

enddata = revlog.end(revs[-1])

253

fullspan = enddata - startdata

253

fullspan = enddata - startdata

254

if targetsize is None or fullspan <= targetsize:

254

if targetsize is None or fullspan <= targetsize:

255

yield revs

255

yield revs

256

return

256

return

257

258

startrevidx = 0

258

startrevidx = 0

259

endrevidx = 1

259

endrevidx = 1

260

iterrevs = enumerate(revs)

260

iterrevs = enumerate(revs)

261

next(iterrevs) # skip first rev.

261

next(iterrevs) # skip first rev.

262

# first step: get snapshots out of the way

262

# first step: get snapshots out of the way

263

for idx, r in iterrevs:

263

for idx, r in iterrevs:

264

span = revlog.end(r) - startdata

264

span = revlog.end(r) - startdata

265

snapshot = revlog.issnapshot(r)

265

snapshot = revlog.issnapshot(r)

266

if span <= targetsize and snapshot:

266

if span <= targetsize and snapshot:

267

endrevidx = idx + 1

267

endrevidx = idx + 1

268

else:

268

else:

269

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

269

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

270

if chunk:

270

if chunk:

271

yield chunk

271

yield chunk

272

startrevidx = idx

272

startrevidx = idx

273

startdata = revlog.start(r)

273

startdata = revlog.start(r)

274

endrevidx = idx + 1

274

endrevidx = idx + 1

275

if not snapshot:

275

if not snapshot:

276

break

276

break

277

278

# for the others, we use binary slicing to quickly converge toward valid

278

# for the others, we use binary slicing to quickly converge toward valid

279

# chunks (otherwise, we might end up looking for start/end of many

279

# chunks (otherwise, we might end up looking for start/end of many

280

# revisions). This logic is not looking for the perfect slicing point, it

280

# revisions). This logic is not looking for the perfect slicing point, it

281

# focuses on quickly converging toward valid chunks.

281

# focuses on quickly converging toward valid chunks.

282

nbitem = len(revs)

282

nbitem = len(revs)

283

while (enddata - startdata) > targetsize:

283

while (enddata - startdata) > targetsize:

284

endrevidx = nbitem

284

endrevidx = nbitem

285

if nbitem - startrevidx <= 1:

285

if nbitem - startrevidx <= 1:

286

break # protect against individual chunk larger than limit

286

break # protect against individual chunk larger than limit

287

localenddata = revlog.end(revs[endrevidx - 1])

287

localenddata = revlog.end(revs[endrevidx - 1])

288

span = localenddata - startdata

288

span = localenddata - startdata

289

while span > targetsize:

289

while span > targetsize:

290

if endrevidx - startrevidx <= 1:

290

if endrevidx - startrevidx <= 1:

291

break # protect against individual chunk larger than limit

291

break # protect against individual chunk larger than limit

292

endrevidx -= (endrevidx - startrevidx) // 2

292

endrevidx -= (endrevidx - startrevidx) // 2

293

localenddata = revlog.end(revs[endrevidx - 1])

293

localenddata = revlog.end(revs[endrevidx - 1])

294

span = localenddata - startdata

294

span = localenddata - startdata

295

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

295

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

296

if chunk:

296

if chunk:

297

yield chunk

297

yield chunk

298

startrevidx = endrevidx

298

startrevidx = endrevidx

299

startdata = revlog.start(revs[startrevidx])

299

startdata = revlog.start(revs[startrevidx])

300

301

chunk = _trimchunk(revlog, revs, startrevidx)

301

chunk = _trimchunk(revlog, revs, startrevidx)

302

if chunk:

302

if chunk:

303

yield chunk

303

yield chunk

304

305

306

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

306

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

307

"""slice revs to reduce the amount of unrelated data to be read from disk.

307

"""slice revs to reduce the amount of unrelated data to be read from disk.

308

309

``revs`` is sliced into groups that should be read in one time.

309

``revs`` is sliced into groups that should be read in one time.

310

Assume that revs are sorted.

310

Assume that revs are sorted.

311

312

The initial chunk is sliced until the overall density (payload/chunks-span

312

The initial chunk is sliced until the overall density (payload/chunks-span

313

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

313

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

314

skipped.

314

skipped.

315

316

>>> revlog = _testrevlog([

316

>>> revlog = _testrevlog([

317

... 5, #00 (5)

317

... 5, #00 (5)

318

... 10, #01 (5)

318

... 10, #01 (5)

319

... 12, #02 (2)

319

... 12, #02 (2)

320

... 12, #03 (empty)

320

... 12, #03 (empty)

321

... 27, #04 (15)

321

... 27, #04 (15)

322

... 31, #05 (4)

322

... 31, #05 (4)

323

... 31, #06 (empty)

323

... 31, #06 (empty)

324

... 42, #07 (11)

324

... 42, #07 (11)

325

... 47, #08 (5)

325

... 47, #08 (5)

326

... 47, #09 (empty)

326

... 47, #09 (empty)

327

... 48, #10 (1)

327

... 48, #10 (1)

328

... 51, #11 (3)

328

... 51, #11 (3)

329

... 74, #12 (23)

329

... 74, #12 (23)

330

... 85, #13 (11)

330

... 85, #13 (11)

331

... 86, #14 (1)

331

... 86, #14 (1)

332

... 91, #15 (5)

332

... 91, #15 (5)

333

... ])

333

... ])

334

335

>>> list(_slicechunktodensity(revlog, list(range(16))))

335

>>> list(_slicechunktodensity(revlog, list(range(16))))

336

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

336

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

337

>>> list(_slicechunktodensity(revlog, [0, 15]))

337

>>> list(_slicechunktodensity(revlog, [0, 15]))

338

[[0], [15]]

338

[[0], [15]]

339

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

339

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

340

[[0], [11], [15]]

340

[[0], [11], [15]]

341

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

342

[[0], [11, 13, 15]]

342

[[0], [11, 13, 15]]

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

344

[[1, 2], [5, 8, 10, 11], [14]]

344

[[1, 2], [5, 8, 10, 11], [14]]

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

346

... mingapsize=20))

346

... mingapsize=20))

347

[[1, 2, 3, 5, 8, 10, 11], [14]]

347

[[1, 2, 3, 5, 8, 10, 11], [14]]

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

... targetdensity=0.95))

349

... targetdensity=0.95))

350

[[1, 2], [5], [8, 10, 11], [14]]

350

[[1, 2], [5], [8, 10, 11], [14]]

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

352

... targetdensity=0.95, mingapsize=12))

352

... targetdensity=0.95, mingapsize=12))

353

[[1, 2], [5, 8, 10, 11], [14]]

353

[[1, 2], [5, 8, 10, 11], [14]]

354

"""

354

"""

355

start = revlog.start

355

start = revlog.start

356

length = revlog.length

356

length = revlog.length

357

358

if len(revs) <= 1:

358

if len(revs) <= 1:

359

yield revs

359

yield revs

360

return

360

return

361

362

deltachainspan = segmentspan(revlog, revs)

362

deltachainspan = segmentspan(revlog, revs)

363

364

if deltachainspan < mingapsize:

364

if deltachainspan < mingapsize:

365

yield revs

365

yield revs

366

return

366

return

367

368

readdata = deltachainspan

368

readdata = deltachainspan

369

chainpayload = sum(length(r) for r in revs)

369

chainpayload = sum(length(r) for r in revs)

370

371

if deltachainspan:

371

if deltachainspan:

372

density = chainpayload / float(deltachainspan)

372

density = chainpayload / float(deltachainspan)

373

else:

373

else:

374

density = 1.0

374

density = 1.0

375

376

if density >= targetdensity:

376

if density >= targetdensity:

377

yield revs

377

yield revs

378

return

378

return

379

380

# Store the gaps in a heap to have them sorted by decreasing size

380

# Store the gaps in a heap to have them sorted by decreasing size

381

gaps = []

381

gaps = []

382

prevend = None

382

prevend = None

383

for i, rev in enumerate(revs):

383

for i, rev in enumerate(revs):

384

revstart = start(rev)

384

revstart = start(rev)

385

revlen = length(rev)

385

revlen = length(rev)

386

387

# Skip empty revisions to form larger holes

387

# Skip empty revisions to form larger holes

388

if revlen == 0:

388

if revlen == 0:

389

continue

389

continue

390

391

if prevend is not None:

391

if prevend is not None:

392

gapsize = revstart - prevend

392

gapsize = revstart - prevend

393

# only consider holes that are large enough

393

# only consider holes that are large enough

394

if gapsize > mingapsize:

394

if gapsize > mingapsize:

395

gaps.append((gapsize, i))

395

gaps.append((gapsize, i))

396

397

prevend = revstart + revlen

397

prevend = revstart + revlen

398

# sort the gaps to pop them from largest to small

398

# sort the gaps to pop them from largest to small

399

gaps.sort()

399

gaps.sort()

400

401

# Collect the indices of the largest holes until the density is acceptable

401

# Collect the indices of the largest holes until the density is acceptable

402

selected = []

402

selected = []

403

while gaps and density < targetdensity:

403

while gaps and density < targetdensity:

404

gapsize, gapidx = gaps.pop()

404

gapsize, gapidx = gaps.pop()

405

406

selected.append(gapidx)

406

selected.append(gapidx)

407

408

# the gap sizes are stored as negatives to be sorted decreasingly

408

# the gap sizes are stored as negatives to be sorted decreasingly

409

# by the heap

409

# by the heap

410

readdata -= gapsize

410

readdata -= gapsize

411

if readdata > 0:

411

if readdata > 0:

412

density = chainpayload / float(readdata)

412

density = chainpayload / float(readdata)

413

else:

413

else:

414

density = 1.0

414

density = 1.0

415

selected.sort()

415

selected.sort()

416

417

# Cut the revs at collected indices

417

# Cut the revs at collected indices

418

previdx = 0

418

previdx = 0

419

for idx in selected:

419

for idx in selected:

420

421

chunk = _trimchunk(revlog, revs, previdx, idx)

421

chunk = _trimchunk(revlog, revs, previdx, idx)

422

if chunk:

422

if chunk:

423

yield chunk

423

yield chunk

424

425

previdx = idx

425

previdx = idx

426

427

chunk = _trimchunk(revlog, revs, previdx)

427

chunk = _trimchunk(revlog, revs, previdx)

428

if chunk:

428

if chunk:

429

yield chunk

429

yield chunk

430

431

432

def _trimchunk(revlog, revs, startidx, endidx=None):

432

def _trimchunk(revlog, revs, startidx, endidx=None):

433

"""returns revs[startidx:endidx] without empty trailing revs

433

"""returns revs[startidx:endidx] without empty trailing revs

434

435

Doctest Setup

435

Doctest Setup

436

>>> revlog = _testrevlog([

436

>>> revlog = _testrevlog([

437

... 5, #0

437

... 5, #0

438

... 10, #1

438

... 10, #1

439

... 12, #2

439

... 12, #2

440

... 12, #3 (empty)

440

... 12, #3 (empty)

441

... 17, #4

441

... 17, #4

442

... 21, #5

442

... 21, #5

443

... 21, #6 (empty)

443

... 21, #6 (empty)

444

... ])

444

... ])

445

446

Contiguous cases:

446

Contiguous cases:

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

448

[0, 1, 2, 3, 4, 5]

448

[0, 1, 2, 3, 4, 5]

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

450

[0, 1, 2, 3, 4]

450

[0, 1, 2, 3, 4]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

452

[0, 1, 2]

452

[0, 1, 2]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

454

[2]

454

[2]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

456

[3, 4, 5]

456

[3, 4, 5]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

458

[3, 4]

458

[3, 4]

459

460

Discontiguous cases:

460

Discontiguous cases:

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

462

[1, 3, 5]

462

[1, 3, 5]

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

464

[1]

464

[1]

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

466

[3, 5]

466

[3, 5]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

468

[3, 5]

468

[3, 5]

469

"""

469

"""

470

length = revlog.length

470

length = revlog.length

471

472

if endidx is None:

472

if endidx is None:

473

endidx = len(revs)

473

endidx = len(revs)

474

475

# If we have a non-emtpy delta candidate, there are nothing to trim

475

# If we have a non-emtpy delta candidate, there are nothing to trim

476

if revs[endidx - 1] < len(revlog):

476

if revs[endidx - 1] < len(revlog):

477

# Trim empty revs at the end, except the very first revision of a chain

477

# Trim empty revs at the end, except the very first revision of a chain

478

while (

478

while (

479

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

479

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

480

):

480

):

481

endidx -= 1

481

endidx -= 1

482

483

return revs[startidx:endidx]

483

return revs[startidx:endidx]

484

485

486

def segmentspan(revlog, revs):

486

def segmentspan(revlog, revs):

487

"""Get the byte span of a segment of revisions

487

"""Get the byte span of a segment of revisions

488

489

revs is a sorted array of revision numbers

489

revs is a sorted array of revision numbers

490

491

>>> revlog = _testrevlog([

491

>>> revlog = _testrevlog([

492

... 5, #0

492

... 5, #0

493

... 10, #1

493

... 10, #1

494

... 12, #2

494

... 12, #2

495

... 12, #3 (empty)

495

... 12, #3 (empty)

496

... 17, #4

496

... 17, #4

497

... ])

497

... ])

498

499

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

499

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

500

17

500

17

501

>>> segmentspan(revlog, [0, 4])

501

>>> segmentspan(revlog, [0, 4])

502

17

502

17

503

>>> segmentspan(revlog, [3, 4])

503

>>> segmentspan(revlog, [3, 4])

504

5

504

5

505

>>> segmentspan(revlog, [1, 2, 3,])

505

>>> segmentspan(revlog, [1, 2, 3,])

506

7

506

7

507

>>> segmentspan(revlog, [1, 3])

507

>>> segmentspan(revlog, [1, 3])

508

7

508

7

509

"""

509

"""

510

if not revs:

510

if not revs:

511

return 0

511

return 0

512

end = revlog.end(revs[-1])

512

end = revlog.end(revs[-1])

513

return end - revlog.start(revs[0])

513

return end - revlog.start(revs[0])

514

515

516

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

516

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

517

"""build full text from a (base, delta) pair and other metadata"""

517

"""build full text from a (base, delta) pair and other metadata"""

518

# special case deltas which replace entire base; no need to decode

518

# special case deltas which replace entire base; no need to decode

519

# base revision. this neatly avoids censored bases, which throw when

519

# base revision. this neatly avoids censored bases, which throw when

520

# they're decoded.

520

# they're decoded.

521

hlen = struct.calcsize(b">lll")

521

hlen = struct.calcsize(b">lll")

522

if delta[:hlen] == mdiff.replacediffheader(

522

if delta[:hlen] == mdiff.replacediffheader(

523

revlog.rawsize(baserev), len(delta) - hlen

523

revlog.rawsize(baserev), len(delta) - hlen

524

):

524

):

525

fulltext = delta[hlen:]

525

fulltext = delta[hlen:]

526

else:

526

else:

527

# deltabase is rawtext before changed by flag processors, which is

527

# deltabase is rawtext before changed by flag processors, which is

528

# equivalent to non-raw text

528

# equivalent to non-raw text

529

basetext = revlog.revision(baserev, _df=fh, raw=False)

529

basetext = revlog.revision(baserev, _df=fh, raw=False)

530

fulltext = mdiff.patch(basetext, delta)

530

fulltext = mdiff.patch(basetext, delta)

531

532

try:

532

try:

533

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

533

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

534

if validatehash:

534

if validatehash:

535

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

535

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

536

if flags & REVIDX_ISCENSORED:

536

if flags & REVIDX_ISCENSORED:

537

raise error.StorageError(

537

raise error.StorageError(

538

_(b'node %s is not censored') % expectednode

538

_(b'node %s is not censored') % expectednode

539

)

539

)

540

except error.CensoredNodeError:

540

except error.CensoredNodeError:

541

# must pass the censored index flag to add censored revisions

541

# must pass the censored index flag to add censored revisions

542

if not flags & REVIDX_ISCENSORED:

542

if not flags & REVIDX_ISCENSORED:

543

raise

543

raise

544

return fulltext

544

return fulltext

545

546

547

@attr.s(slots=True, frozen=True)

547

@attr.s(slots=True, frozen=True)

548

class _deltainfo(object):

548

class _deltainfo(object):

549

distance = attr.ib()

549

distance = attr.ib()

550

deltalen = attr.ib()

550

deltalen = attr.ib()

551

data = attr.ib()

551

data = attr.ib()

552

base = attr.ib()

552

base = attr.ib()

553

chainbase = attr.ib()

553

chainbase = attr.ib()

554

chainlen = attr.ib()

554

chainlen = attr.ib()

555

compresseddeltalen = attr.ib()

555

compresseddeltalen = attr.ib()

556

snapshotdepth = attr.ib()

556

snapshotdepth = attr.ib()

557

558

559

def drop_u_compression(delta):

559

def drop_u_compression(delta):

560

"""turn into a "u" (no-compression) into no-compression without header

560

"""turn into a "u" (no-compression) into no-compression without header

561

562

This is useful for revlog format that has better compression method.

562

This is useful for revlog format that has better compression method.

563

"""

563

"""

564

assert delta.data[0] == b'u', delta.data[0]

564

assert delta.data[0] == b'u', delta.data[0]

565

return _deltainfo(

565

return _deltainfo(

566

delta.distance,

566

delta.distance,

567

delta.deltalen - 1,

567

delta.deltalen - 1,

568

(b'', delta.data[1]),

568

(b'', delta.data[1]),

569

delta.base,

569

delta.base,

570

delta.chainbase,

570

delta.chainbase,

571

delta.chainlen,

571

delta.chainlen,

572

delta.compresseddeltalen,

572

delta.compresseddeltalen,

573

delta.snapshotdepth,

573

delta.snapshotdepth,

574

)

574

)

575

576

577

def isgooddeltainfo(revlog, deltainfo, revinfo):

577

def isgooddeltainfo(revlog, deltainfo, revinfo):

578

"""Returns True if the given delta is good. Good means that it is within

578

"""Returns True if the given delta is good. Good means that it is within

579

the disk span, disk size, and chain length bounds that we know to be

579

the disk span, disk size, and chain length bounds that we know to be

580

performant."""

580

performant."""

581

if deltainfo is None:

581

if deltainfo is None:

582

return False

582

return False

583

584

# - 'deltainfo.distance' is the distance from the base revision --

584

# - 'deltainfo.distance' is the distance from the base revision --

585

# bounding it limits the amount of I/O we need to do.

585

# bounding it limits the amount of I/O we need to do.

586

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

586

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

587

# deltas we need to apply -- bounding it limits the amount of CPU

587

# deltas we need to apply -- bounding it limits the amount of CPU

588

# we consume.

588

# we consume.

589

590

textlen = revinfo.textlen

590

textlen = revinfo.textlen

591

defaultmax = textlen * 4

591

defaultmax = textlen * 4

592

maxdist = revlog._maxdeltachainspan

592

maxdist = revlog._maxdeltachainspan

593

if not maxdist:

593

if not maxdist:

594

maxdist = deltainfo.distance # ensure the conditional pass

594

maxdist = deltainfo.distance # ensure the conditional pass

595

maxdist = max(maxdist, defaultmax)

595

maxdist = max(maxdist, defaultmax)

596

597

# Bad delta from read span:

597

# Bad delta from read span:

598

#

598

#

599

# If the span of data read is larger than the maximum allowed.

599

# If the span of data read is larger than the maximum allowed.

600

#

600

#

601

# In the sparse-revlog case, we rely on the associated "sparse reading"

601

# In the sparse-revlog case, we rely on the associated "sparse reading"

602

# to avoid issue related to the span of data. In theory, it would be

602

# to avoid issue related to the span of data. In theory, it would be

603

# possible to build pathological revlog where delta pattern would lead

603

# possible to build pathological revlog where delta pattern would lead

604

# to too many reads. However, they do not happen in practice at all. So

604

# to too many reads. However, they do not happen in practice at all. So

605

# we skip the span check entirely.

605

# we skip the span check entirely.

606

if not revlog._sparserevlog and maxdist < deltainfo.distance:

606

if not revlog._sparserevlog and maxdist < deltainfo.distance:

607

return False

607

return False

608

609

# Bad delta from new delta size:

609

# Bad delta from new delta size:

610

#

610

#

611

# If the delta size is larger than the target text, storing the

611

# If the delta size is larger than the target text, storing the

612

# delta will be inefficient.

612

# delta will be inefficient.

613

if textlen < deltainfo.deltalen:

613

if textlen < deltainfo.deltalen:

614

return False

614

return False

615

616

# Bad delta from cumulated payload size:

616

# Bad delta from cumulated payload size:

617

#

617

#

618

# If the sum of delta get larger than K * target text length.

618

# If the sum of delta get larger than K * target text length.

619

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

619

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

620

return False

620

return False

621

622

# Bad delta from chain length:

622

# Bad delta from chain length:

623

#

623

#

624

# If the number of delta in the chain gets too high.

624

# If the number of delta in the chain gets too high.

625

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

625

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

626

return False

626

return False

627

628

# bad delta from intermediate snapshot size limit

628

# bad delta from intermediate snapshot size limit

629

#

629

#

630

# If an intermediate snapshot size is higher than the limit. The

630

# If an intermediate snapshot size is higher than the limit. The

631

# limit exist to prevent endless chain of intermediate delta to be

631

# limit exist to prevent endless chain of intermediate delta to be

632

# created.

632

# created.

633

if (

633

if (

634

deltainfo.snapshotdepth is not None

634

deltainfo.snapshotdepth is not None

635

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

635

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

636

):

636

):

637

return False

637

return False

638

639

# bad delta if new intermediate snapshot is larger than the previous

639

# bad delta if new intermediate snapshot is larger than the previous

640

# snapshot

640

# snapshot

641

if (

641

if (

642

deltainfo.snapshotdepth

642

deltainfo.snapshotdepth

643

and revlog.length(deltainfo.base) < deltainfo.deltalen

643

and revlog.length(deltainfo.base) < deltainfo.deltalen

644

):

644

):

645

return False

645

return False

646

647

return True

647

return True

648

649

650

# If a revision's full text is that much bigger than a base candidate full

650

# If a revision's full text is that much bigger than a base candidate full

651

# text's, it is very unlikely that it will produce a valid delta. We no longer

651

# text's, it is very unlikely that it will produce a valid delta. We no longer

652

# consider these candidates.

652

# consider these candidates.

653

LIMIT_BASE2TEXT = 500

653

LIMIT_BASE2TEXT = 500

654

655

656

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

656

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

657

"""Provides group of revision to be tested as delta base

657

"""Provides group of revision to be tested as delta base

658

659

This top level function focus on emitting groups with unique and worthwhile

659

This top level function focus on emitting groups with unique and worthwhile

660

content. See _raw_candidate_groups for details about the group order.

660

content. See _raw_candidate_groups for details about the group order.

661

"""

661

"""

662

# should we try to build a delta?

662

# should we try to build a delta?

663

if not (len(revlog) and revlog._storedeltachains):

663

if not (len(revlog) and revlog._storedeltachains):

664

yield None

664

yield None

665

return

665

return

666

667

deltalength = revlog.length

667

deltalength = revlog.length

668

deltaparent = revlog.deltaparent

668

deltaparent = revlog.deltaparent

669

sparse = revlog._sparserevlog

669

sparse = revlog._sparserevlog

670

good = None

670

good = None

671

672

deltas_limit = textlen * LIMIT_DELTA2TEXT

672

deltas_limit = textlen * LIMIT_DELTA2TEXT

673

674

tested = {nullrev}

674

tested = {nullrev}

675

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

675

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

676

while True:

676

while True:

677

temptative = candidates.send(good)

677

temptative = candidates.send(good)

678

if temptative is None:

678

if temptative is None:

679

break

679

break

680

group = []

680

group = []

681

for rev in temptative:

681

for rev in temptative:

682

# skip over empty delta (no need to include them in a chain)

682

# skip over empty delta (no need to include them in a chain)

683

while revlog._generaldelta and not (

683

while revlog._generaldelta and not (

684

rev == nullrev or rev in tested or deltalength(rev)

684

rev == nullrev or rev in tested or deltalength(rev)

685

):

685

):

686

tested.add(rev)

686

tested.add(rev)

687

rev = deltaparent(rev)

687

rev = deltaparent(rev)

688

# no need to try a delta against nullrev, this will be done as a

688

# no need to try a delta against nullrev, this will be done as a

689

# last resort.

689

# last resort.

690

if rev == nullrev:

690

if rev == nullrev:

691

continue

691

continue

692

# filter out revision we tested already

692

# filter out revision we tested already

693

if rev in tested:

693

if rev in tested:

694

continue

694

continue

695

tested.add(rev)

695

tested.add(rev)

696

# filter out delta base that will never produce good delta

696

# filter out delta base that will never produce good delta

697

if deltas_limit < revlog.length(rev):

697

if deltas_limit < revlog.length(rev):

698

continue

698

continue

699

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

699

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

700

continue

700

continue

701

# no delta for rawtext-changing revs (see "candelta" for why)

701

# no delta for rawtext-changing revs (see "candelta" for why)

702

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

702

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

703

continue

703

continue

704

# If we reach here, we are about to build and test a delta.

704

# If we reach here, we are about to build and test a delta.

705

# The delta building process will compute the chaininfo in all

705

# The delta building process will compute the chaininfo in all

706

# case, since that computation is cached, it is fine to access it

706

# case, since that computation is cached, it is fine to access it

707

# here too.

707

# here too.

708

chainlen, chainsize = revlog._chaininfo(rev)

708

chainlen, chainsize = revlog._chaininfo(rev)

709

# if chain will be too long, skip base

709

# if chain will be too long, skip base

710

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

710

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

711

continue

711

continue

712

# if chain already have too much data, skip base

712

# if chain already have too much data, skip base

713

if deltas_limit < chainsize:

713

if deltas_limit < chainsize:

714

continue

714

continue

715

if sparse and revlog.upperboundcomp is not None:

715

if sparse and revlog.upperboundcomp is not None:

716

maxcomp = revlog.upperboundcomp

716

maxcomp = revlog.upperboundcomp

717

basenotsnap = (p1, p2, nullrev)

717

basenotsnap = (p1, p2, nullrev)

718

if rev not in basenotsnap and revlog.issnapshot(rev):

718

if rev not in basenotsnap and revlog.issnapshot(rev):

719

snapshotdepth = revlog.snapshotdepth(rev)

719

snapshotdepth = revlog.snapshotdepth(rev)

720

# If text is significantly larger than the base, we can

720

# If text is significantly larger than the base, we can

721

# expect the resulting delta to be proportional to the size

721

# expect the resulting delta to be proportional to the size

722

# difference

722

# difference

723

revsize = revlog.rawsize(rev)

723

revsize = revlog.rawsize(rev)

724

rawsizedistance = max(textlen - revsize, 0)

724

rawsizedistance = max(textlen - revsize, 0)

725

# use an estimate of the compression upper bound.

725

# use an estimate of the compression upper bound.

726

lowestrealisticdeltalen = rawsizedistance // maxcomp

726

lowestrealisticdeltalen = rawsizedistance // maxcomp

727

728

# check the absolute constraint on the delta size

728

# check the absolute constraint on the delta size

729

snapshotlimit = textlen >> snapshotdepth

729

snapshotlimit = textlen >> snapshotdepth

730

if snapshotlimit < lowestrealisticdeltalen:

730

if snapshotlimit < lowestrealisticdeltalen:

731

# delta lower bound is larger than accepted upper bound

731

# delta lower bound is larger than accepted upper bound

732

continue

732

continue

733

734

# check the relative constraint on the delta size

734

# check the relative constraint on the delta size

735

revlength = revlog.length(rev)

735

revlength = revlog.length(rev)

736

if revlength < lowestrealisticdeltalen:

736

if revlength < lowestrealisticdeltalen:

737

# delta probable lower bound is larger than target base

737

# delta probable lower bound is larger than target base

738

continue

738

continue

739

740

group.append(rev)

740

group.append(rev)

741

if group:

741

if group:

742

# XXX: in the sparse revlog case, group can become large,

742

# XXX: in the sparse revlog case, group can become large,

743

# impacting performances. Some bounding or slicing mecanism

743

# impacting performances. Some bounding or slicing mecanism

744

# would help to reduce this impact.

744

# would help to reduce this impact.

745

good = yield tuple(group)

745

good = yield tuple(group)

746

yield None

746

yield None

747

748

749

def _findsnapshots(revlog, cache, start_rev):

749

def _findsnapshots(revlog, cache, start_rev):

750

"""find snapshot from start_rev to tip"""

750

"""find snapshot from start_rev to tip"""

751

if util.safehasattr(revlog.index, b'findsnapshots'):

751

if util.safehasattr(revlog.index, b'findsnapshots'):

752

revlog.index.findsnapshots(cache, start_rev)

752

revlog.index.findsnapshots(cache, start_rev)

753

else:

753

else:

754

deltaparent = revlog.deltaparent

754

deltaparent = revlog.deltaparent

755

issnapshot = revlog.issnapshot

755

issnapshot = revlog.issnapshot

756

for rev in revlog.revs(start_rev):

756

for rev in revlog.revs(start_rev):

757

if issnapshot(rev):

757

if issnapshot(rev):

758

cache[deltaparent(rev)].append(rev)

758

cache[deltaparent(rev)].append(rev)

759

760

761

def _refinedgroups(revlog, p1, p2, cachedelta):

761

def _refinedgroups(revlog, p1, p2, cachedelta):

762

good = None

762

good = None

763

# First we try to reuse a the delta contained in the bundle.

763

# First we try to reuse a the delta contained in the bundle.

764

# (or from the source revlog)

764

# (or from the source revlog)

765

#

765

#

766

# This logic only applies to general delta repositories and can be disabled

766

# This logic only applies to general delta repositories and can be disabled

767

# through configuration. Disabling reuse source delta is useful when

767

# through configuration. Disabling reuse source delta is useful when

768

# we want to make sure we recomputed "optimal" deltas.

768

# we want to make sure we recomputed "optimal" deltas.

769

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

769

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

770

# Assume what we received from the server is a good choice

770

# Assume what we received from the server is a good choice

771

# build delta will reuse the cache

771

# build delta will reuse the cache

772

good = yield (cachedelta[0],)

772

good = yield (cachedelta[0],)

773

if good is not None:

773

if good is not None:

774

yield None

774

yield None

775

return

775

return

776

snapshots = collections.defaultdict(list)

776

snapshots = collections.defaultdict(list)

777

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

777

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

778

good = yield candidates

778

good = yield candidates

779

if good is not None:

779

if good is not None:

780

break

780

break

781

782

# If sparse revlog is enabled, we can try to refine the available deltas

782

# If sparse revlog is enabled, we can try to refine the available deltas

783

if not revlog._sparserevlog:

783

if not revlog._sparserevlog:

784

yield None

784

yield None

785

return

785

return

786

787

# if we have a refinable value, try to refine it

787

# if we have a refinable value, try to refine it

788

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

788

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

789

# refine snapshot down

789

# refine snapshot down

790

previous = None

790

previous = None

791

while previous != good:

791

while previous != good:

792

previous = good

792

previous = good

793

base = revlog.deltaparent(good)

793

base = revlog.deltaparent(good)

794

if base == nullrev:

794

if base == nullrev:

795

break

795

break

796

good = yield (base,)

796

good = yield (base,)

797

# refine snapshot up

797

# refine snapshot up

798

if not snapshots:

798

if not snapshots:

799

_findsnapshots(revlog, snapshots, good + 1)

799

_findsnapshots(revlog, snapshots, good + 1)

800

previous = None

800

previous = None

801

while good != previous:

801

while good != previous:

802

previous = good

802

previous = good

803

children = tuple(sorted(c for c in snapshots[good]))

803

children = tuple(sorted(c for c in snapshots[good]))

804

good = yield children

804

good = yield children

805

806

# we have found nothing

806

# we have found nothing

807

yield None

807

yield None

808

809

810

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

810

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

811

"""Provides group of revision to be tested as delta base

811

"""Provides group of revision to be tested as delta base

812

813

This lower level function focus on emitting delta theorically interresting

813

This lower level function focus on emitting delta theorically interresting

814

without looking it any practical details.

814

without looking it any practical details.

815

816

The group order aims at providing fast or small candidates first.

816

The group order aims at providing fast or small candidates first.

817

"""

817

"""

818

gdelta = revlog._generaldelta

818

gdelta = revlog._generaldelta

819

# gate sparse behind general-delta because of issue6056

819

# gate sparse behind general-delta because of issue6056

820

sparse = gdelta and revlog._sparserevlog

820

sparse = gdelta and revlog._sparserevlog

821

curr = len(revlog)

821

curr = len(revlog)

822

prev = curr - 1

822

prev = curr - 1

823

deltachain = lambda rev: revlog._deltachain(rev)[0]

823

deltachain = lambda rev: revlog._deltachain(rev)[0]

824

825

if gdelta:

825

if gdelta:

826

# exclude already lazy tested base if any

826

# exclude already lazy tested base if any

827

parents = [p for p in (p1, p2) if p != nullrev]

827

parents = [p for p in (p1, p2) if p != nullrev]

828

829

if not revlog._deltabothparents and len(parents) == 2:

829

if not revlog._deltabothparents and len(parents) == 2:

830

parents.sort()

830

parents.sort()

831

# To minimize the chance of having to build a fulltext,

831

# To minimize the chance of having to build a fulltext,

832

# pick first whichever parent is closest to us (max rev)

832

# pick first whichever parent is closest to us (max rev)

833

yield (parents[1],)

833

yield (parents[1],)

834

# then the other one (min rev) if the first did not fit

834

# then the other one (min rev) if the first did not fit

835

yield (parents[0],)

835

yield (parents[0],)

836

elif len(parents) > 0:

836

elif len(parents) > 0:

837

# Test all parents (1 or 2), and keep the best candidate

837

# Test all parents (1 or 2), and keep the best candidate

838

yield parents

838

yield parents

839

840

if sparse and parents:

840

if sparse and parents:

841

if snapshots is None:

841

if snapshots is None:

842

# map: base-rev: snapshot-rev

842

# map: base-rev: snapshot-rev

843

snapshots = collections.defaultdict(list)

843

snapshots = collections.defaultdict(list)

844

# See if we can use an existing snapshot in the parent chains to use as

844

# See if we can use an existing snapshot in the parent chains to use as

845

# a base for a new intermediate-snapshot

845

# a base for a new intermediate-snapshot

846

#

846

#

847

# search for snapshot in parents delta chain

847

# search for snapshot in parents delta chain

848

# map: snapshot-level: snapshot-rev

848

# map: snapshot-level: snapshot-rev

849

parents_snaps = collections.defaultdict(set)

849

parents_snaps = collections.defaultdict(set)

850

candidate_chains = [deltachain(p) for p in parents]

850

candidate_chains = [deltachain(p) for p in parents]

851

for chain in candidate_chains:

851

for chain in candidate_chains:

852

for idx, s in enumerate(chain):

852

for idx, s in enumerate(chain):

853

if not revlog.issnapshot(s):

853

if not revlog.issnapshot(s):

854

break

854

break

855

parents_snaps[idx].add(s)

855

parents_snaps[idx].add(s)

856

snapfloor = min(parents_snaps[0]) + 1

856

snapfloor = min(parents_snaps[0]) + 1

857

_findsnapshots(revlog, snapshots, snapfloor)

857

_findsnapshots(revlog, snapshots, snapfloor)

858

# search for the highest "unrelated" revision

858

# search for the highest "unrelated" revision

859

#

859

#

860

# Adding snapshots used by "unrelated" revision increase the odd we

860

# Adding snapshots used by "unrelated" revision increase the odd we

861

# reuse an independant, yet better snapshot chain.

861

# reuse an independant, yet better snapshot chain.

862

#

862

#

863

# XXX instead of building a set of revisions, we could lazily enumerate

863

# XXX instead of building a set of revisions, we could lazily enumerate

864

# over the chains. That would be more efficient, however we stick to

864

# over the chains. That would be more efficient, however we stick to

865

# simple code for now.

865

# simple code for now.

866

all_revs = set()

866

all_revs = set()

867

for chain in candidate_chains:

867

for chain in candidate_chains:

868

all_revs.update(chain)

868

all_revs.update(chain)

869

other = None

869

other = None

870

for r in revlog.revs(prev, snapfloor):

870

for r in revlog.revs(prev, snapfloor):

871

if r not in all_revs:

871

if r not in all_revs:

872

other = r

872

other = r

873

break

873

break

874

if other is not None:

874

if other is not None:

875

# To avoid unfair competition, we won't use unrelated intermediate

875

# To avoid unfair competition, we won't use unrelated intermediate

876

# snapshot that are deeper than the ones from the parent delta

876

# snapshot that are deeper than the ones from the parent delta

877

# chain.

877

# chain.

878

max_depth = max(parents_snaps.keys())

878

max_depth = max(parents_snaps.keys())

879

chain = deltachain(other)

879

chain = deltachain(other)

880

for idx, s in enumerate(chain):

880

for idx, s in enumerate(chain):

881

if s < snapfloor:

881

if s < snapfloor:

882

continue

882

continue

883

if max_depth < idx:

883

if max_depth < idx:

884

break

884

break

885

if not revlog.issnapshot(s):

885

if not revlog.issnapshot(s):

886

break

886

break

887

parents_snaps[idx].add(s)

887

parents_snaps[idx].add(s)

888

# Test them as possible intermediate snapshot base

888

# Test them as possible intermediate snapshot base

889

# We test them from highest to lowest level. High level one are more

889

# We test them from highest to lowest level. High level one are more

890

# likely to result in small delta

890

# likely to result in small delta

891

floor = None

891

floor = None

892

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

892

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

893

siblings = set()

893

siblings = set()

894

for s in snaps:

894

for s in snaps:

895

siblings.update(snapshots[s])

895

siblings.update(snapshots[s])

896

# Before considering making a new intermediate snapshot, we check

896

# Before considering making a new intermediate snapshot, we check

897

# if an existing snapshot, children of base we consider, would be

897

# if an existing snapshot, children of base we consider, would be

898

# suitable.

898

# suitable.

899

#

899

#

900

# It give a change to reuse a delta chain "unrelated" to the

900

# It give a change to reuse a delta chain "unrelated" to the

901

# current revision instead of starting our own. Without such

901

# current revision instead of starting our own. Without such

902

# re-use, topological branches would keep reopening new chains.

902

# re-use, topological branches would keep reopening new chains.

903

# Creating more and more snapshot as the repository grow.

903

# Creating more and more snapshot as the repository grow.

904

905

if floor is not None:

905

if floor is not None:

906

# We only do this for siblings created after the one in our

906

# We only do this for siblings created after the one in our

907

# parent's delta chain. Those created before has less chances

907

# parent's delta chain. Those created before has less chances

908

# to be valid base since our ancestors had to create a new

908

# to be valid base since our ancestors had to create a new

909

# snapshot.

909

# snapshot.

910

siblings = [r for r in siblings if floor < r]

910

siblings = [r for r in siblings if floor < r]

911

yield tuple(sorted(siblings))

911

yield tuple(sorted(siblings))

912

# then test the base from our parent's delta chain.

912

# then test the base from our parent's delta chain.

913

yield tuple(sorted(snaps))

913

yield tuple(sorted(snaps))

914

floor = min(snaps)

914

floor = min(snaps)

915

# No suitable base found in the parent chain, search if any full

915

# No suitable base found in the parent chain, search if any full

916

# snapshots emitted since parent's base would be a suitable base for an

916

# snapshots emitted since parent's base would be a suitable base for an

917

# intermediate snapshot.

917

# intermediate snapshot.

918

#

918

#

919

# It give a chance to reuse a delta chain unrelated to the current

919

# It give a chance to reuse a delta chain unrelated to the current

920

# revisions instead of starting our own. Without such re-use,

920

# revisions instead of starting our own. Without such re-use,

921

# topological branches would keep reopening new full chains. Creating

921

# topological branches would keep reopening new full chains. Creating

922

# more and more snapshot as the repository grow.

922

# more and more snapshot as the repository grow.

923

yield tuple(snapshots[nullrev])

923

yield tuple(snapshots[nullrev])

924

925

if not sparse:

925

if not sparse:

926

# other approach failed try against prev to hopefully save us a

926

# other approach failed try against prev to hopefully save us a

927

# fulltext.

927

# fulltext.

928

yield (prev,)

928

yield (prev,)

929

930

931

class deltacomputer(object):

931

class deltacomputer(object):

932

def __init__(self, revlog):

932

def __init__(self, revlog):

933

self.revlog = revlog

933

self.revlog = revlog

934

935

def buildtext(self, revinfo, fh):

935

def buildtext(self, revinfo, fh):

936

"""Builds a fulltext version of a revision

936

"""Builds a fulltext version of a revision

937

938

revinfo: revisioninfo instance that contains all needed info

938

revinfo: revisioninfo instance that contains all needed info

939

fh: file handle to either the .i or the .d revlog file,

939

fh: file handle to either the .i or the .d revlog file,

940

depending on whether it is inlined or not

940

depending on whether it is inlined or not

941

"""

941

"""

942

btext = revinfo.btext

942

btext = revinfo.btext

943

if btext[0] is not None:

943

if btext[0] is not None:

944

return btext[0]

944

return btext[0]

945

946

revlog = self.revlog

946

revlog = self.revlog

947

cachedelta = revinfo.cachedelta

947

cachedelta = revinfo.cachedelta

948

baserev = cachedelta[0]

948

baserev = cachedelta[0]

949

delta = cachedelta[1]

949

delta = cachedelta[1]

950

951

fulltext = btext[0] = _textfromdelta(

951

fulltext = btext[0] = _textfromdelta(

952

fh,

952

fh,

953

revlog,

953

revlog,

954

baserev,

954

baserev,

955

delta,

955

delta,

956

revinfo.p1,

956

revinfo.p1,

957

revinfo.p2,

957

revinfo.p2,

958

revinfo.flags,

958

revinfo.flags,

959

revinfo.node,

959

revinfo.node,

960

)

960

)

961

return fulltext

961

return fulltext

962

963

def _builddeltadiff(self, base, revinfo, fh):

963

def _builddeltadiff(self, base, revinfo, fh):

964

revlog = self.revlog

964

revlog = self.revlog

965

t = self.buildtext(revinfo, fh)

965

t = self.buildtext(revinfo, fh)

966

if revlog.iscensored(base):

966

if revlog.iscensored(base):

967

# deltas based on a censored revision must replace the

967

# deltas based on a censored revision must replace the

968

# full content in one patch, so delta works everywhere

968

# full content in one patch, so delta works everywhere

969

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

969

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

970

delta = header + t

970

delta = header + t

971

else:

971

else:

972

ptext = revlog.rawdata(base, _df=fh)

972

ptext = revlog.rawdata(base, _df=fh)

973

delta = mdiff.textdiff(ptext, t)

973

delta = mdiff.textdiff(ptext, t)

974

975

return delta

975

return delta

976

977

def _builddeltainfo(self, revinfo, base, fh):

977

def _builddeltainfo(self, revinfo, base, fh):

978

# can we use the cached delta?

978

# can we use the cached delta?

979

revlog = self.revlog

979

revlog = self.revlog

980

chainbase = revlog.chainbase(base)

980

chainbase = revlog.chainbase(base)

981

if revlog._generaldelta:

981

if revlog._generaldelta:

982

deltabase = base

982

deltabase = base

983

else:

983

else:

984

deltabase = chainbase

984

deltabase = chainbase

985

snapshotdepth = None

985

snapshotdepth = None

986

if revlog._sparserevlog and deltabase == nullrev:

986

if revlog._sparserevlog and deltabase == nullrev:

987

snapshotdepth = 0

987

snapshotdepth = 0

988

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

988

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

989

# A delta chain should always be one full snapshot,

989

# A delta chain should always be one full snapshot,

990

# zero or more semi-snapshots, and zero or more deltas

990

# zero or more semi-snapshots, and zero or more deltas

991

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

991

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

992

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

992

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

993

snapshotdepth = len(revlog._deltachain(deltabase)[0])

993

snapshotdepth = len(revlog._deltachain(deltabase)[0])

994

delta = None

994

delta = None

995

if revinfo.cachedelta:

995

if revinfo.cachedelta:

996

cachebase, cachediff = revinfo.cachedelta

996

cachebase, cachediff = revinfo.cachedelta

997

# check if the diff still apply

997

# check if the diff still apply

998

currentbase = cachebase

998

currentbase = cachebase

999

while (

999

while (

1000

currentbase != nullrev

1000

currentbase != nullrev

1001

and currentbase != base

1001

and currentbase != base

1002

and self.revlog.length(currentbase) == 0

1002

and self.revlog.length(currentbase) == 0

1003

):

1003

):

1004

currentbase = self.revlog.deltaparent(currentbase)

1004

currentbase = self.revlog.deltaparent(currentbase)

1005

if self.revlog._lazydelta and currentbase == base:

1005

if self.revlog._lazydelta and currentbase == base:

1006

delta = revinfo.cachedelta[1]

1006

delta = revinfo.cachedelta[1]

1007

if delta is None:

1007

if delta is None:

1008

delta = self._builddeltadiff(base, revinfo, fh)

1008

delta = self._builddeltadiff(base, revinfo, fh)

1009

# snapshotdept need to be neither None nor 0 level snapshot

1009

# snapshotdept need to be neither None nor 0 level snapshot

1010

if revlog.upperboundcomp is not None and snapshotdepth:

1010

if revlog.upperboundcomp is not None and snapshotdepth:

1011

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1011

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1012

snapshotlimit = revinfo.textlen >> snapshotdepth

1012

snapshotlimit = revinfo.textlen >> snapshotdepth

1013

if snapshotlimit < lowestrealisticdeltalen:

1013

if snapshotlimit < lowestrealisticdeltalen:

1014

return None

1014

return None

1015

if revlog.length(base) < lowestrealisticdeltalen:

1015

if revlog.length(base) < lowestrealisticdeltalen:

1016

return None

1016

return None

1017

header, data = revlog.compress(delta)

1017

header, data = revlog.compress(delta)

1018

deltalen = len(header) + len(data)

1018

deltalen = len(header) + len(data)

1019

offset = revlog.end(len(revlog) - 1)

1019

offset = revlog.end(len(revlog) - 1)

1020

dist = deltalen + offset - revlog.start(chainbase)

1020

dist = deltalen + offset - revlog.start(chainbase)

1021

chainlen, compresseddeltalen = revlog._chaininfo(base)

1021

chainlen, compresseddeltalen = revlog._chaininfo(base)

1022

chainlen += 1

1022

chainlen += 1

1023

compresseddeltalen += deltalen

1023

compresseddeltalen += deltalen

1024

1025

return _deltainfo(

1025

return _deltainfo(

1026

dist,

1026

dist,

1027

deltalen,

1027

deltalen,

1028

(header, data),

1028

(header, data),

1029

deltabase,

1029

deltabase,

1030

chainbase,

1030

chainbase,

1031

chainlen,

1031

chainlen,

1032

compresseddeltalen,

1032

compresseddeltalen,

1033

snapshotdepth,

1033

snapshotdepth,

1034

)

1034

)

1035

1036

def _fullsnapshotinfo(self, fh, revinfo):

1036

def _fullsnapshotinfo(self, fh, revinfo, curr):

1037

curr = len(self.revlog)

1038

rawtext = self.buildtext(revinfo, fh)

1037

rawtext = self.buildtext(revinfo, fh)

1039

data = self.revlog.compress(rawtext)

1038

data = self.revlog.compress(rawtext)

1040

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1039

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1041

deltabase = chainbase = curr

1040

deltabase = chainbase = curr

1042

snapshotdepth = 0

1041

snapshotdepth = 0

1043

chainlen = 1

1042

chainlen = 1

1044

1043

1045

return _deltainfo(

1044

return _deltainfo(

1046

dist,

1045

dist,

1047

deltalen,

1046

deltalen,

1048

data,

1047

data,

1049

deltabase,

1048

deltabase,

1050

chainbase,

1049

chainbase,

1051

chainlen,

1050

chainlen,

1052

compresseddeltalen,

1051

compresseddeltalen,

1053

snapshotdepth,

1052

snapshotdepth,

1054

)

1053

)

1055

1054

1056

def finddeltainfo(self, revinfo, fh, excluded_bases=None):

1055

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1057

"""Find an acceptable delta against a candidate revision

1056

"""Find an acceptable delta against a candidate revision

1058

1057

1059

revinfo: information about the revision (instance of _revisioninfo)

1058

revinfo: information about the revision (instance of _revisioninfo)

1060

fh: file handle to either the .i or the .d revlog file,

1059

fh: file handle to either the .i or the .d revlog file,

1061

depending on whether it is inlined or not

1060

depending on whether it is inlined or not

1062

1061

1063

Returns the first acceptable candidate revision, as ordered by

1062

Returns the first acceptable candidate revision, as ordered by

1064

_candidategroups

1063

_candidategroups

1065

1064

1066

If no suitable deltabase is found, we return delta info for a full

1065

If no suitable deltabase is found, we return delta info for a full

1067

snapshot.

1066

snapshot.

1068

1067

1069

`excluded_bases` is an optional set of revision that cannot be used as

1068

`excluded_bases` is an optional set of revision that cannot be used as

1070

a delta base. Use this to recompute delta suitable in censor or strip

1069

a delta base. Use this to recompute delta suitable in censor or strip

1071

context.

1070

context.

1072

"""

1071

"""

1072

if target_rev is None:

1073

curr = len(self.revlog)

1074

1073

if not revinfo.textlen:

1075

if not revinfo.textlen:

1074

return self._fullsnapshotinfo(fh, revinfo)

1076

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1075

1077

1076

if excluded_bases is None:

1078

if excluded_bases is None:

1077

excluded_bases = set()

1079

excluded_bases = set()

1078

1080

1079

# no delta for flag processor revision (see "candelta" for why)

1081

# no delta for flag processor revision (see "candelta" for why)

1080

# not calling candelta since only one revision needs test, also to

1082

# not calling candelta since only one revision needs test, also to

1081

# avoid overhead fetching flags again.

1083

# avoid overhead fetching flags again.

1082

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1084

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1083

return self._fullsnapshotinfo(fh, revinfo)

1085

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1084

1086

1085

cachedelta = revinfo.cachedelta

1087

cachedelta = revinfo.cachedelta

1086

p1 = revinfo.p1

1088

p1 = revinfo.p1

1087

p2 = revinfo.p2

1089

p2 = revinfo.p2

1088

revlog = self.revlog

1090

revlog = self.revlog

1089

1091

1090

deltainfo = None

1092

deltainfo = None

1091

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1093

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1092

groups = _candidategroups(

1094

groups = _candidategroups(

1093

self.revlog, revinfo.textlen, p1r, p2r, cachedelta

1095

self.revlog, revinfo.textlen, p1r, p2r, cachedelta

1094

)

1096

)

1095

candidaterevs = next(groups)

1097

candidaterevs = next(groups)

1096

while candidaterevs is not None:

1098

while candidaterevs is not None:

1097

nominateddeltas = []

1099

nominateddeltas = []

1098

if deltainfo is not None:

1100

if deltainfo is not None:

1099

# if we already found a good delta,

1101

# if we already found a good delta,

1100

# challenge it against refined candidates

1102

# challenge it against refined candidates

1101

nominateddeltas.append(deltainfo)

1103

nominateddeltas.append(deltainfo)

1102

for candidaterev in candidaterevs:

1104

for candidaterev in candidaterevs:

1103

if candidaterev in excluded_bases:

1105

if candidaterev in excluded_bases:

1104

continue

1106

continue

1107

if candidaterev >= target_rev:

1108

continue

1105

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1109

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1106

if candidatedelta is not None:

1110

if candidatedelta is not None:

1107

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1111

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1108

nominateddeltas.append(candidatedelta)

1112

nominateddeltas.append(candidatedelta)

1109

if nominateddeltas:

1113

if nominateddeltas:

1110

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1114

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1111

if deltainfo is not None:

1115

if deltainfo is not None:

1112

candidaterevs = groups.send(deltainfo.base)

1116

candidaterevs = groups.send(deltainfo.base)

1113

else:

1117

else:

1114

candidaterevs = next(groups)

1118

candidaterevs = next(groups)

1115

1119

1116

if deltainfo is None:

1120

if deltainfo is None:

1117

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1121

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1118

return deltainfo

1122

return deltainfo

1119

1123

1120

1124

1121

def delta_compression(default_compression_header, deltainfo):

1125

def delta_compression(default_compression_header, deltainfo):

1122

"""return (COMPRESSION_MODE, deltainfo)

1126

"""return (COMPRESSION_MODE, deltainfo)

1123

1127

1124

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1128

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1125

compression.

1129

compression.

1126

"""

1130

"""

1127

h, d = deltainfo.data

1131

h, d = deltainfo.data

1128

compression_mode = COMP_MODE_INLINE

1132

compression_mode = COMP_MODE_INLINE

1129

if not h and not d:

1133

if not h and not d:

1130

# not data to store at all... declare them uncompressed

1134

# not data to store at all... declare them uncompressed

1131

compression_mode = COMP_MODE_PLAIN

1135

compression_mode = COMP_MODE_PLAIN

1132

elif not h:

1136

elif not h:

1133

t = d[0:1]

1137

t = d[0:1]

1134

if t == b'\0':

1138

if t == b'\0':

1135

compression_mode = COMP_MODE_PLAIN

1139

compression_mode = COMP_MODE_PLAIN

1136

elif t == default_compression_header:

1140

elif t == default_compression_header:

1137

compression_mode = COMP_MODE_DEFAULT

1141

compression_mode = COMP_MODE_DEFAULT

1138

elif h == b'u':

1142

elif h == b'u':

1139

# we have a more efficient way to declare uncompressed

1143

# we have a more efficient way to declare uncompressed

1140

h = b''

1144

h = b''

1141

compression_mode = COMP_MODE_PLAIN

1145

compression_mode = COMP_MODE_PLAIN

1142

deltainfo = drop_u_compression(deltainfo)

1146

deltainfo = drop_u_compression(deltainfo)

1143

return compression_mode, deltainfo

1147

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, b'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 snapshots = collections.defaultdict(list)
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: snapshot-rev
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if snapshotlimit < lowestrealisticdeltalen:
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
-                def _fullsnapshotinfo(self, fh, revinfo):
+                def _fullsnapshotinfo(self, fh, revinfo, curr):
-                    curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
-                def finddeltainfo(self, revinfo, fh, excluded_bases=None):
+                def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
+                    if target_rev is None:
+                        curr = len(self.revlog)
                     if not revinfo.textlen:
-                        return self._fullsnapshotinfo(fh, revinfo)
+                        return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
-                        return self._fullsnapshotinfo(fh, revinfo)
+                        return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(
                         self.revlog, revinfo.textlen, p1r, p2r, cachedelta
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if candidaterev in excluded_bases:
                                 continue
+                            if candidaterev >= target_rev:
+                                continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
-                        deltainfo = self._fullsnapshotinfo(fh, revinfo)
+                        deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo