upstream/mercurial-mirror Commit - r48193:1efe3cde

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import nullrev

16

from ..node import nullrev

17

from ..i18n import _

17

from ..i18n import _

18

from ..pycompat import getattr

18

from ..pycompat import getattr

19

20

from .constants import (

20

from .constants import (

21

REVIDX_ISCENSORED,

21

REVIDX_ISCENSORED,

22

REVIDX_RAWTEXT_CHANGING_FLAGS,

22

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

)

23

)

24

25

from ..thirdparty import attr

25

from ..thirdparty import attr

26

27

from .. import (

27

from .. import (

28

error,

28

error,

29

mdiff,

29

mdiff,

30

util,

30

util,

31

)

31

)

32

33

from . import flagutil

33

from . import flagutil

34

35

# maximum <delta-chain-data>/<revision-text-length> ratio

35

# maximum <delta-chain-data>/<revision-text-length> ratio

36

LIMIT_DELTA2TEXT = 2

36

LIMIT_DELTA2TEXT = 2

37

38

39

class _testrevlog(object):

39

class _testrevlog(object):

40

"""minimalist fake revlog to use in doctests"""

40

"""minimalist fake revlog to use in doctests"""

41

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

43

"""data is an list of revision payload boundaries"""

43

"""data is an list of revision payload boundaries"""

44

self._data = data

44

self._data = data

45

self._srdensitythreshold = density

45

self._srdensitythreshold = density

46

self._srmingapsize = mingap

46

self._srmingapsize = mingap

47

self._snapshot = set(snapshot)

47

self._snapshot = set(snapshot)

48

self.index = None

48

self.index = None

49

50

def start(self, rev):

50

def start(self, rev):

51

if rev == nullrev:

51

if rev == nullrev:

52

return 0

52

return 0

53

if rev == 0:

53

if rev == 0:

54

return 0

54

return 0

55

return self._data[rev - 1]

55

return self._data[rev - 1]

56

57

def end(self, rev):

57

def end(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

return self._data[rev]

60

return self._data[rev]

61

62

def length(self, rev):

62

def length(self, rev):

63

return self.end(rev) - self.start(rev)

63

return self.end(rev) - self.start(rev)

64

65

def __len__(self):

65

def __len__(self):

66

return len(self._data)

66

return len(self._data)

67

68

def issnapshot(self, rev):

68

def issnapshot(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return True

70

return True

71

return rev in self._snapshot

71

return rev in self._snapshot

72

73

74

def slicechunk(revlog, revs, targetsize=None):

74

def slicechunk(revlog, revs, targetsize=None):

75

"""slice revs to reduce the amount of unrelated data to be read from disk.

75

"""slice revs to reduce the amount of unrelated data to be read from disk.

76

77

``revs`` is sliced into groups that should be read in one time.

77

``revs`` is sliced into groups that should be read in one time.

78

Assume that revs are sorted.

78

Assume that revs are sorted.

79

80

The initial chunk is sliced until the overall density (payload/chunks-span

80

The initial chunk is sliced until the overall density (payload/chunks-span

81

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

81

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

82

`revlog._srmingapsize` is skipped.

82

`revlog._srmingapsize` is skipped.

83

84

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

84

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

85

For consistency with other slicing choice, this limit won't go lower than

85

For consistency with other slicing choice, this limit won't go lower than

86

`revlog._srmingapsize`.

86

`revlog._srmingapsize`.

87

88

If individual revisions chunk are larger than this limit, they will still

88

If individual revisions chunk are larger than this limit, they will still

89

be raised individually.

89

be raised individually.

90

91

>>> data = [

91

>>> data = [

92

... 5, #00 (5)

92

... 5, #00 (5)

93

... 10, #01 (5)

93

... 10, #01 (5)

94

... 12, #02 (2)

94

... 12, #02 (2)

95

... 12, #03 (empty)

95

... 12, #03 (empty)

96

... 27, #04 (15)

96

... 27, #04 (15)

97

... 31, #05 (4)

97

... 31, #05 (4)

98

... 31, #06 (empty)

98

... 31, #06 (empty)

99

... 42, #07 (11)

99

... 42, #07 (11)

100

... 47, #08 (5)

100

... 47, #08 (5)

101

... 47, #09 (empty)

101

... 47, #09 (empty)

102

... 48, #10 (1)

102

... 48, #10 (1)

103

... 51, #11 (3)

103

... 51, #11 (3)

104

... 74, #12 (23)

104

... 74, #12 (23)

105

... 85, #13 (11)

105

... 85, #13 (11)

106

... 86, #14 (1)

106

... 86, #14 (1)

107

... 91, #15 (5)

107

... 91, #15 (5)

108

... ]

108

... ]

109

>>> revlog = _testrevlog(data, snapshot=range(16))

109

>>> revlog = _testrevlog(data, snapshot=range(16))

110

111

>>> list(slicechunk(revlog, list(range(16))))

111

>>> list(slicechunk(revlog, list(range(16))))

112

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

112

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

113

>>> list(slicechunk(revlog, [0, 15]))

113

>>> list(slicechunk(revlog, [0, 15]))

114

[[0], [15]]

114

[[0], [15]]

115

>>> list(slicechunk(revlog, [0, 11, 15]))

115

>>> list(slicechunk(revlog, [0, 11, 15]))

116

[[0], [11], [15]]

116

[[0], [11], [15]]

117

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

117

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

118

[[0], [11, 13, 15]]

118

[[0], [11, 13, 15]]

119

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

119

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

120

[[1, 2], [5, 8, 10, 11], [14]]

120

[[1, 2], [5, 8, 10, 11], [14]]

121

122

Slicing with a maximum chunk size

122

Slicing with a maximum chunk size

123

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

123

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

124

[[0], [11], [13], [15]]

124

[[0], [11], [13], [15]]

125

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

125

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

126

[[0], [11], [13, 15]]

126

[[0], [11], [13, 15]]

127

128

Slicing involving nullrev

128

Slicing involving nullrev

129

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

129

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

130

[[-1, 0], [11], [13, 15]]

130

[[-1, 0], [11], [13, 15]]

131

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

131

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

132

[[-1], [13], [15]]

132

[[-1], [13], [15]]

133

"""

133

"""

134

if targetsize is not None:

134

if targetsize is not None:

135

targetsize = max(targetsize, revlog._srmingapsize)

135

targetsize = max(targetsize, revlog._srmingapsize)

136

# targetsize should not be specified when evaluating delta candidates:

136

# targetsize should not be specified when evaluating delta candidates:

137

# * targetsize is used to ensure we stay within specification when reading,

137

# * targetsize is used to ensure we stay within specification when reading,

138

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

138

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

139

if densityslicing is None:

139

if densityslicing is None:

140

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

140

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

141

for chunk in densityslicing(

141

for chunk in densityslicing(

142

revs, revlog._srdensitythreshold, revlog._srmingapsize

142

revs, revlog._srdensitythreshold, revlog._srmingapsize

143

):

143

):

144

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

144

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

145

yield subchunk

145

yield subchunk

146

147

148

def _slicechunktosize(revlog, revs, targetsize=None):

148

def _slicechunktosize(revlog, revs, targetsize=None):

149

"""slice revs to match the target size

149

"""slice revs to match the target size

150

151

This is intended to be used on chunk that density slicing selected by that

151

This is intended to be used on chunk that density slicing selected by that

152

are still too large compared to the read garantee of revlog. This might

152

are still too large compared to the read garantee of revlog. This might

153

happens when "minimal gap size" interrupted the slicing or when chain are

153

happens when "minimal gap size" interrupted the slicing or when chain are

154

built in a way that create large blocks next to each other.

154

built in a way that create large blocks next to each other.

155

156

>>> data = [

156

>>> data = [

157

... 3, #0 (3)

157

... 3, #0 (3)

158

... 5, #1 (2)

158

... 5, #1 (2)

159

... 6, #2 (1)

159

... 6, #2 (1)

160

... 8, #3 (2)

160

... 8, #3 (2)

161

... 8, #4 (empty)

161

... 8, #4 (empty)

162

... 11, #5 (3)

162

... 11, #5 (3)

163

... 12, #6 (1)

163

... 12, #6 (1)

164

... 13, #7 (1)

164

... 13, #7 (1)

165

... 14, #8 (1)

165

... 14, #8 (1)

166

... ]

166

... ]

167

168

== All snapshots cases ==

168

== All snapshots cases ==

169

>>> revlog = _testrevlog(data, snapshot=range(9))

169

>>> revlog = _testrevlog(data, snapshot=range(9))

170

171

Cases where chunk is already small enough

171

Cases where chunk is already small enough

172

>>> list(_slicechunktosize(revlog, [0], 3))

172

>>> list(_slicechunktosize(revlog, [0], 3))

173

[[0]]

173

[[0]]

174

>>> list(_slicechunktosize(revlog, [6, 7], 3))

174

>>> list(_slicechunktosize(revlog, [6, 7], 3))

175

[[6, 7]]

175

[[6, 7]]

176

>>> list(_slicechunktosize(revlog, [0], None))

176

>>> list(_slicechunktosize(revlog, [0], None))

177

[[0]]

177

[[0]]

178

>>> list(_slicechunktosize(revlog, [6, 7], None))

178

>>> list(_slicechunktosize(revlog, [6, 7], None))

179

[[6, 7]]

179

[[6, 7]]

180

181

cases where we need actual slicing

181

cases where we need actual slicing

182

>>> list(_slicechunktosize(revlog, [0, 1], 3))

182

>>> list(_slicechunktosize(revlog, [0, 1], 3))

183

[[0], [1]]

183

[[0], [1]]

184

>>> list(_slicechunktosize(revlog, [1, 3], 3))

184

>>> list(_slicechunktosize(revlog, [1, 3], 3))

185

[[1], [3]]

185

[[1], [3]]

186

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

186

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

187

[[1, 2], [3]]

187

[[1, 2], [3]]

188

>>> list(_slicechunktosize(revlog, [3, 5], 3))

188

>>> list(_slicechunktosize(revlog, [3, 5], 3))

189

[[3], [5]]

189

[[3], [5]]

190

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

190

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

191

[[3], [5]]

191

[[3], [5]]

192

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

192

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

193

[[5], [6, 7, 8]]

193

[[5], [6, 7, 8]]

194

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

194

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

195

[[0], [1, 2], [3], [5], [6, 7, 8]]

195

[[0], [1, 2], [3], [5], [6, 7, 8]]

196

197

Case with too large individual chunk (must return valid chunk)

197

Case with too large individual chunk (must return valid chunk)

198

>>> list(_slicechunktosize(revlog, [0, 1], 2))

198

>>> list(_slicechunktosize(revlog, [0, 1], 2))

199

[[0], [1]]

199

[[0], [1]]

200

>>> list(_slicechunktosize(revlog, [1, 3], 1))

200

>>> list(_slicechunktosize(revlog, [1, 3], 1))

201

[[1], [3]]

201

[[1], [3]]

202

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

202

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

203

[[3], [5]]

203

[[3], [5]]

204

205

== No Snapshot cases ==

205

== No Snapshot cases ==

206

>>> revlog = _testrevlog(data)

206

>>> revlog = _testrevlog(data)

207

208

Cases where chunk is already small enough

208

Cases where chunk is already small enough

209

>>> list(_slicechunktosize(revlog, [0], 3))

209

>>> list(_slicechunktosize(revlog, [0], 3))

210

[[0]]

210

[[0]]

211

>>> list(_slicechunktosize(revlog, [6, 7], 3))

211

>>> list(_slicechunktosize(revlog, [6, 7], 3))

212

[[6, 7]]

212

[[6, 7]]

213

>>> list(_slicechunktosize(revlog, [0], None))

213

>>> list(_slicechunktosize(revlog, [0], None))

214

[[0]]

214

[[0]]

215

>>> list(_slicechunktosize(revlog, [6, 7], None))

215

>>> list(_slicechunktosize(revlog, [6, 7], None))

216

[[6, 7]]

216

[[6, 7]]

217

218

cases where we need actual slicing

218

cases where we need actual slicing

219

>>> list(_slicechunktosize(revlog, [0, 1], 3))

219

>>> list(_slicechunktosize(revlog, [0, 1], 3))

220

[[0], [1]]

220

[[0], [1]]

221

>>> list(_slicechunktosize(revlog, [1, 3], 3))

221

>>> list(_slicechunktosize(revlog, [1, 3], 3))

222

[[1], [3]]

222

[[1], [3]]

223

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

223

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

224

[[1], [2, 3]]

224

[[1], [2, 3]]

225

>>> list(_slicechunktosize(revlog, [3, 5], 3))

225

>>> list(_slicechunktosize(revlog, [3, 5], 3))

226

[[3], [5]]

226

[[3], [5]]

227

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

227

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

228

[[3], [4, 5]]

228

[[3], [4, 5]]

229

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

229

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

230

[[5], [6, 7, 8]]

230

[[5], [6, 7, 8]]

231

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

231

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

232

[[0], [1, 2], [3], [5], [6, 7, 8]]

232

[[0], [1, 2], [3], [5], [6, 7, 8]]

233

234

Case with too large individual chunk (must return valid chunk)

234

Case with too large individual chunk (must return valid chunk)

235

>>> list(_slicechunktosize(revlog, [0, 1], 2))

235

>>> list(_slicechunktosize(revlog, [0, 1], 2))

236

[[0], [1]]

236

[[0], [1]]

237

>>> list(_slicechunktosize(revlog, [1, 3], 1))

237

>>> list(_slicechunktosize(revlog, [1, 3], 1))

238

[[1], [3]]

238

[[1], [3]]

239

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

239

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

240

[[3], [5]]

240

[[3], [5]]

241

242

== mixed case ==

242

== mixed case ==

243

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

243

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

244

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

244

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

245

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

245

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

246

"""

246

"""

247

assert targetsize is None or 0 <= targetsize

247

assert targetsize is None or 0 <= targetsize

248

startdata = revlog.start(revs[0])

248

startdata = revlog.start(revs[0])

249

enddata = revlog.end(revs[-1])

249

enddata = revlog.end(revs[-1])

250

fullspan = enddata - startdata

250

fullspan = enddata - startdata

251

if targetsize is None or fullspan <= targetsize:

251

if targetsize is None or fullspan <= targetsize:

252

yield revs

252

yield revs

253

return

253

return

254

255

startrevidx = 0

255

startrevidx = 0

256

endrevidx = 1

256

endrevidx = 1

257

iterrevs = enumerate(revs)

257

iterrevs = enumerate(revs)

258

next(iterrevs) # skip first rev.

258

next(iterrevs) # skip first rev.

259

# first step: get snapshots out of the way

259

# first step: get snapshots out of the way

260

for idx, r in iterrevs:

260

for idx, r in iterrevs:

261

span = revlog.end(r) - startdata

261

span = revlog.end(r) - startdata

262

snapshot = revlog.issnapshot(r)

262

snapshot = revlog.issnapshot(r)

263

if span <= targetsize and snapshot:

263

if span <= targetsize and snapshot:

264

endrevidx = idx + 1

264

endrevidx = idx + 1

265

else:

265

else:

266

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

266

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

267

if chunk:

267

if chunk:

268

yield chunk

268

yield chunk

269

startrevidx = idx

269

startrevidx = idx

270

startdata = revlog.start(r)

270

startdata = revlog.start(r)

271

endrevidx = idx + 1

271

endrevidx = idx + 1

272

if not snapshot:

272

if not snapshot:

273

break

273

break

274

275

# for the others, we use binary slicing to quickly converge toward valid

275

# for the others, we use binary slicing to quickly converge toward valid

276

# chunks (otherwise, we might end up looking for start/end of many

276

# chunks (otherwise, we might end up looking for start/end of many

277

# revisions). This logic is not looking for the perfect slicing point, it

277

# revisions). This logic is not looking for the perfect slicing point, it

278

# focuses on quickly converging toward valid chunks.

278

# focuses on quickly converging toward valid chunks.

279

nbitem = len(revs)

279

nbitem = len(revs)

280

while (enddata - startdata) > targetsize:

280

while (enddata - startdata) > targetsize:

281

endrevidx = nbitem

281

endrevidx = nbitem

282

if nbitem - startrevidx <= 1:

282

if nbitem - startrevidx <= 1:

283

break # protect against individual chunk larger than limit

283

break # protect against individual chunk larger than limit

284

localenddata = revlog.end(revs[endrevidx - 1])

284

localenddata = revlog.end(revs[endrevidx - 1])

285

span = localenddata - startdata

285

span = localenddata - startdata

286

while span > targetsize:

286

while span > targetsize:

287

if endrevidx - startrevidx <= 1:

287

if endrevidx - startrevidx <= 1:

288

break # protect against individual chunk larger than limit

288

break # protect against individual chunk larger than limit

289

endrevidx -= (endrevidx - startrevidx) // 2

289

endrevidx -= (endrevidx - startrevidx) // 2

290

localenddata = revlog.end(revs[endrevidx - 1])

290

localenddata = revlog.end(revs[endrevidx - 1])

291

span = localenddata - startdata

291

span = localenddata - startdata

292

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

292

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

293

if chunk:

293

if chunk:

294

yield chunk

294

yield chunk

295

startrevidx = endrevidx

295

startrevidx = endrevidx

296

startdata = revlog.start(revs[startrevidx])

296

startdata = revlog.start(revs[startrevidx])

297

298

chunk = _trimchunk(revlog, revs, startrevidx)

298

chunk = _trimchunk(revlog, revs, startrevidx)

299

if chunk:

299

if chunk:

300

yield chunk

300

yield chunk

301

302

303

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

303

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

304

"""slice revs to reduce the amount of unrelated data to be read from disk.

304

"""slice revs to reduce the amount of unrelated data to be read from disk.

305

306

``revs`` is sliced into groups that should be read in one time.

306

``revs`` is sliced into groups that should be read in one time.

307

Assume that revs are sorted.

307

Assume that revs are sorted.

308

309

The initial chunk is sliced until the overall density (payload/chunks-span

309

The initial chunk is sliced until the overall density (payload/chunks-span

310

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

310

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

311

skipped.

311

skipped.

312

313

>>> revlog = _testrevlog([

313

>>> revlog = _testrevlog([

314

... 5, #00 (5)

314

... 5, #00 (5)

315

... 10, #01 (5)

315

... 10, #01 (5)

316

... 12, #02 (2)

316

... 12, #02 (2)

317

... 12, #03 (empty)

317

... 12, #03 (empty)

318

... 27, #04 (15)

318

... 27, #04 (15)

319

... 31, #05 (4)

319

... 31, #05 (4)

320

... 31, #06 (empty)

320

... 31, #06 (empty)

321

... 42, #07 (11)

321

... 42, #07 (11)

322

... 47, #08 (5)

322

... 47, #08 (5)

323

... 47, #09 (empty)

323

... 47, #09 (empty)

324

... 48, #10 (1)

324

... 48, #10 (1)

325

... 51, #11 (3)

325

... 51, #11 (3)

326

... 74, #12 (23)

326

... 74, #12 (23)

327

... 85, #13 (11)

327

... 85, #13 (11)

328

... 86, #14 (1)

328

... 86, #14 (1)

329

... 91, #15 (5)

329

... 91, #15 (5)

330

... ])

330

... ])

331

332

>>> list(_slicechunktodensity(revlog, list(range(16))))

332

>>> list(_slicechunktodensity(revlog, list(range(16))))

333

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

333

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

334

>>> list(_slicechunktodensity(revlog, [0, 15]))

334

>>> list(_slicechunktodensity(revlog, [0, 15]))

335

[[0], [15]]

335

[[0], [15]]

336

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

336

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

337

[[0], [11], [15]]

337

[[0], [11], [15]]

338

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

338

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

339

[[0], [11, 13, 15]]

339

[[0], [11, 13, 15]]

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

341

[[1, 2], [5, 8, 10, 11], [14]]

341

[[1, 2], [5, 8, 10, 11], [14]]

342

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

342

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

343

... mingapsize=20))

343

... mingapsize=20))

344

[[1, 2, 3, 5, 8, 10, 11], [14]]

344

[[1, 2, 3, 5, 8, 10, 11], [14]]

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

346

... targetdensity=0.95))

346

... targetdensity=0.95))

347

[[1, 2], [5], [8, 10, 11], [14]]

347

[[1, 2], [5], [8, 10, 11], [14]]

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

349

... targetdensity=0.95, mingapsize=12))

349

... targetdensity=0.95, mingapsize=12))

350

[[1, 2], [5, 8, 10, 11], [14]]

350

[[1, 2], [5, 8, 10, 11], [14]]

351

"""

351

"""

352

start = revlog.start

352

start = revlog.start

353

length = revlog.length

353

length = revlog.length

354

355

if len(revs) <= 1:

355

if len(revs) <= 1:

356

yield revs

356

yield revs

357

return

357

return

358

359

deltachainspan = segmentspan(revlog, revs)

359

deltachainspan = segmentspan(revlog, revs)

360

361

if deltachainspan < mingapsize:

361

if deltachainspan < mingapsize:

362

yield revs

362

yield revs

363

return

363

return

364

365

readdata = deltachainspan

365

readdata = deltachainspan

366

chainpayload = sum(length(r) for r in revs)

366

chainpayload = sum(length(r) for r in revs)

367

368

if deltachainspan:

368

if deltachainspan:

369

density = chainpayload / float(deltachainspan)

369

density = chainpayload / float(deltachainspan)

370

else:

370

else:

371

density = 1.0

371

density = 1.0

372

373

if density >= targetdensity:

373

if density >= targetdensity:

374

yield revs

374

yield revs

375

return

375

return

376

377

# Store the gaps in a heap to have them sorted by decreasing size

377

# Store the gaps in a heap to have them sorted by decreasing size

378

gaps = []

378

gaps = []

379

prevend = None

379

prevend = None

380

for i, rev in enumerate(revs):

380

for i, rev in enumerate(revs):

381

revstart = start(rev)

381

revstart = start(rev)

382

revlen = length(rev)

382

revlen = length(rev)

383

384

# Skip empty revisions to form larger holes

384

# Skip empty revisions to form larger holes

385

if revlen == 0:

385

if revlen == 0:

386

continue

386

continue

387

388

if prevend is not None:

388

if prevend is not None:

389

gapsize = revstart - prevend

389

gapsize = revstart - prevend

390

# only consider holes that are large enough

390

# only consider holes that are large enough

391

if gapsize > mingapsize:

391

if gapsize > mingapsize:

392

gaps.append((gapsize, i))

392

gaps.append((gapsize, i))

393

394

prevend = revstart + revlen

394

prevend = revstart + revlen

395

# sort the gaps to pop them from largest to small

395

# sort the gaps to pop them from largest to small

396

gaps.sort()

396

gaps.sort()

397

398

# Collect the indices of the largest holes until the density is acceptable

398

# Collect the indices of the largest holes until the density is acceptable

399

selected = []

399

selected = []

400

while gaps and density < targetdensity:

400

while gaps and density < targetdensity:

401

gapsize, gapidx = gaps.pop()

401

gapsize, gapidx = gaps.pop()

402

403

selected.append(gapidx)

403

selected.append(gapidx)

404

405

# the gap sizes are stored as negatives to be sorted decreasingly

405

# the gap sizes are stored as negatives to be sorted decreasingly

406

# by the heap

406

# by the heap

407

readdata -= gapsize

407

readdata -= gapsize

408

if readdata > 0:

408

if readdata > 0:

409

density = chainpayload / float(readdata)

409

density = chainpayload / float(readdata)

410

else:

410

else:

411

density = 1.0

411

density = 1.0

412

selected.sort()

412

selected.sort()

413

414

# Cut the revs at collected indices

414

# Cut the revs at collected indices

415

previdx = 0

415

previdx = 0

416

for idx in selected:

416

for idx in selected:

417

418

chunk = _trimchunk(revlog, revs, previdx, idx)

418

chunk = _trimchunk(revlog, revs, previdx, idx)

419

if chunk:

419

if chunk:

420

yield chunk

420

yield chunk

421

422

previdx = idx

422

previdx = idx

423

424

chunk = _trimchunk(revlog, revs, previdx)

424

chunk = _trimchunk(revlog, revs, previdx)

425

if chunk:

425

if chunk:

426

yield chunk

426

yield chunk

427

428

429

def _trimchunk(revlog, revs, startidx, endidx=None):

429

def _trimchunk(revlog, revs, startidx, endidx=None):

430

"""returns revs[startidx:endidx] without empty trailing revs

430

"""returns revs[startidx:endidx] without empty trailing revs

431

432

Doctest Setup

432

Doctest Setup

433

>>> revlog = _testrevlog([

433

>>> revlog = _testrevlog([

434

... 5, #0

434

... 5, #0

435

... 10, #1

435

... 10, #1

436

... 12, #2

436

... 12, #2

437

... 12, #3 (empty)

437

... 12, #3 (empty)

438

... 17, #4

438

... 17, #4

439

... 21, #5

439

... 21, #5

440

... 21, #6 (empty)

440

... 21, #6 (empty)

441

... ])

441

... ])

442

443

Contiguous cases:

443

Contiguous cases:

444

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

444

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

445

[0, 1, 2, 3, 4, 5]

445

[0, 1, 2, 3, 4, 5]

446

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

446

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

447

[0, 1, 2, 3, 4]

447

[0, 1, 2, 3, 4]

448

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

448

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

449

[0, 1, 2]

449

[0, 1, 2]

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

450

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

451

[2]

451

[2]

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

452

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

453

[3, 4, 5]

453

[3, 4, 5]

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

454

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

455

[3, 4]

455

[3, 4]

456

457

Discontiguous cases:

457

Discontiguous cases:

458

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

458

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

459

[1, 3, 5]

459

[1, 3, 5]

460

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

460

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

461

[1]

461

[1]

462

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

462

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

463

[3, 5]

463

[3, 5]

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

464

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

465

[3, 5]

465

[3, 5]

466

"""

466

"""

467

length = revlog.length

467

length = revlog.length

468

469

if endidx is None:

469

if endidx is None:

470

endidx = len(revs)

470

endidx = len(revs)

471

472

# If we have a non-emtpy delta candidate, there are nothing to trim

472

# If we have a non-emtpy delta candidate, there are nothing to trim

473

if revs[endidx - 1] < len(revlog):

473

if revs[endidx - 1] < len(revlog):

474

# Trim empty revs at the end, except the very first revision of a chain

474

# Trim empty revs at the end, except the very first revision of a chain

475

while (

475

while (

476

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

476

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

477

):

477

):

478

endidx -= 1

478

endidx -= 1

479

480

return revs[startidx:endidx]

480

return revs[startidx:endidx]

481

482

483

def segmentspan(revlog, revs):

483

def segmentspan(revlog, revs):

484

"""Get the byte span of a segment of revisions

484

"""Get the byte span of a segment of revisions

485

486

revs is a sorted array of revision numbers

486

revs is a sorted array of revision numbers

487

488

>>> revlog = _testrevlog([

488

>>> revlog = _testrevlog([

489

... 5, #0

489

... 5, #0

490

... 10, #1

490

... 10, #1

491

... 12, #2

491

... 12, #2

492

... 12, #3 (empty)

492

... 12, #3 (empty)

493

... 17, #4

493

... 17, #4

494

... ])

494

... ])

495

496

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

496

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

497

17

497

17

498

>>> segmentspan(revlog, [0, 4])

498

>>> segmentspan(revlog, [0, 4])

499

17

499

17

500

>>> segmentspan(revlog, [3, 4])

500

>>> segmentspan(revlog, [3, 4])

501

5

501

5

502

>>> segmentspan(revlog, [1, 2, 3,])

502

>>> segmentspan(revlog, [1, 2, 3,])

503

7

503

7

504

>>> segmentspan(revlog, [1, 3])

504

>>> segmentspan(revlog, [1, 3])

505

7

505

7

506

"""

506

"""

507

if not revs:

507

if not revs:

508

return 0

508

return 0

509

end = revlog.end(revs[-1])

509

end = revlog.end(revs[-1])

510

return end - revlog.start(revs[0])

510

return end - revlog.start(revs[0])

511

512

513

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

513

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

514

"""build full text from a (base, delta) pair and other metadata"""

514

"""build full text from a (base, delta) pair and other metadata"""

515

# special case deltas which replace entire base; no need to decode

515

# special case deltas which replace entire base; no need to decode

516

# base revision. this neatly avoids censored bases, which throw when

516

# base revision. this neatly avoids censored bases, which throw when

517

# they're decoded.

517

# they're decoded.

518

hlen = struct.calcsize(b">lll")

518

hlen = struct.calcsize(b">lll")

519

if delta[:hlen] == mdiff.replacediffheader(

519

if delta[:hlen] == mdiff.replacediffheader(

520

revlog.rawsize(baserev), len(delta) - hlen

520

revlog.rawsize(baserev), len(delta) - hlen

521

):

521

):

522

fulltext = delta[hlen:]

522

fulltext = delta[hlen:]

523

else:

523

else:

524

# deltabase is rawtext before changed by flag processors, which is

524

# deltabase is rawtext before changed by flag processors, which is

525

# equivalent to non-raw text

525

# equivalent to non-raw text

526

basetext = revlog.revision(baserev, _df=fh, raw=False)

526

basetext = revlog.revision(baserev, _df=fh, raw=False)

527

fulltext = mdiff.patch(basetext, delta)

527

fulltext = mdiff.patch(basetext, delta)

528

529

try:

529

try:

530

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

530

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

531

if validatehash:

531

if validatehash:

532

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

532

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

533

if flags & REVIDX_ISCENSORED:

533

if flags & REVIDX_ISCENSORED:

534

raise error.StorageError(

534

raise error.StorageError(

535

_(b'node %s is not censored') % expectednode

535

_(b'node %s is not censored') % expectednode

536

)

536

)

537

except error.CensoredNodeError:

537

except error.CensoredNodeError:

538

# must pass the censored index flag to add censored revisions

538

# must pass the censored index flag to add censored revisions

539

if not flags & REVIDX_ISCENSORED:

539

if not flags & REVIDX_ISCENSORED:

540

raise

540

raise

541

return fulltext

541

return fulltext

542

543

544

@attr.s(slots=True, frozen=True)

544

@attr.s(slots=True, frozen=True)

545

class _deltainfo(object):

545

class _deltainfo(object):

546

distance = attr.ib()

546

distance = attr.ib()

547

deltalen = attr.ib()

547

deltalen = attr.ib()

548

data = attr.ib()

548

data = attr.ib()

549

base = attr.ib()

549

base = attr.ib()

550

chainbase = attr.ib()

550

chainbase = attr.ib()

551

chainlen = attr.ib()

551

chainlen = attr.ib()

552

compresseddeltalen = attr.ib()

552

compresseddeltalen = attr.ib()

553

snapshotdepth = attr.ib()

553

snapshotdepth = attr.ib()

554

555

556

def drop_u_compression(delta):

556

def drop_u_compression(delta):

557

"""turn into a "u" (no-compression) into no-compression without header

557

"""turn into a "u" (no-compression) into no-compression without header

558

559

This is useful for revlog format that has better compression method.

559

This is useful for revlog format that has better compression method.

560

"""

560

"""

561

assert delta.data[0] == b'u', delta.data[0]

561

assert delta.data[0] == b'u', delta.data[0]

562

return _deltainfo(

562

return _deltainfo(

563

delta.distance,

563

delta.distance,

564

delta.deltalen - 1,

564

delta.deltalen - 1,

565

(b'', delta.data[1]),

565

(b'', delta.data[1]),

566

delta.base,

566

delta.base,

567

delta.chainbase,

567

delta.chainbase,

568

delta.chainlen,

568

delta.chainlen,

569

delta.compresseddeltalen,

569

delta.compresseddeltalen,

570

delta.snapshotdepth,

570

delta.snapshotdepth,

571

)

571

)

572

573

574

def isgooddeltainfo(revlog, deltainfo, revinfo):

574

def isgooddeltainfo(revlog, deltainfo, revinfo):

575

"""Returns True if the given delta is good. Good means that it is within

575

"""Returns True if the given delta is good. Good means that it is within

576

the disk span, disk size, and chain length bounds that we know to be

576

the disk span, disk size, and chain length bounds that we know to be

577

performant."""

577

performant."""

578

if deltainfo is None:

578

if deltainfo is None:

579

return False

579

return False

580

581

# - 'deltainfo.distance' is the distance from the base revision --

581

# - 'deltainfo.distance' is the distance from the base revision --

582

# bounding it limits the amount of I/O we need to do.

582

# bounding it limits the amount of I/O we need to do.

583

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

583

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

584

# deltas we need to apply -- bounding it limits the amount of CPU

584

# deltas we need to apply -- bounding it limits the amount of CPU

585

# we consume.

585

# we consume.

586

587

textlen = revinfo.textlen

587

textlen = revinfo.textlen

588

defaultmax = textlen * 4

588

defaultmax = textlen * 4

589

maxdist = revlog._maxdeltachainspan

589

maxdist = revlog._maxdeltachainspan

590

if not maxdist:

590

if not maxdist:

591

maxdist = deltainfo.distance # ensure the conditional pass

591

maxdist = deltainfo.distance # ensure the conditional pass

592

maxdist = max(maxdist, defaultmax)

592

maxdist = max(maxdist, defaultmax)

593

594

# Bad delta from read span:

594

# Bad delta from read span:

595

#

595

#

596

# If the span of data read is larger than the maximum allowed.

596

# If the span of data read is larger than the maximum allowed.

597

#

597

#

598

# In the sparse-revlog case, we rely on the associated "sparse reading"

598

# In the sparse-revlog case, we rely on the associated "sparse reading"

599

# to avoid issue related to the span of data. In theory, it would be

599

# to avoid issue related to the span of data. In theory, it would be

600

# possible to build pathological revlog where delta pattern would lead

600

# possible to build pathological revlog where delta pattern would lead

601

# to too many reads. However, they do not happen in practice at all. So

601

# to too many reads. However, they do not happen in practice at all. So

602

# we skip the span check entirely.

602

# we skip the span check entirely.

603

if not revlog._sparserevlog and maxdist < deltainfo.distance:

603

if not revlog._sparserevlog and maxdist < deltainfo.distance:

604

return False

604

return False

605

606

# Bad delta from new delta size:

606

# Bad delta from new delta size:

607

#

607

#

608

# If the delta size is larger than the target text, storing the

608

# If the delta size is larger than the target text, storing the

609

# delta will be inefficient.

609

# delta will be inefficient.

610

if textlen < deltainfo.deltalen:

610

if textlen < deltainfo.deltalen:

611

return False

611

return False

612

613

# Bad delta from cumulated payload size:

613

# Bad delta from cumulated payload size:

614

#

614

#

615

# If the sum of delta get larger than K * target text length.

615

# If the sum of delta get larger than K * target text length.

616

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

616

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

617

return False

617

return False

618

619

# Bad delta from chain length:

619

# Bad delta from chain length:

620

#

620

#

621

# If the number of delta in the chain gets too high.

621

# If the number of delta in the chain gets too high.

622

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

622

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

623

return False

623

return False

624

625

# bad delta from intermediate snapshot size limit

625

# bad delta from intermediate snapshot size limit

626

#

626

#

627

# If an intermediate snapshot size is higher than the limit. The

627

# If an intermediate snapshot size is higher than the limit. The

628

# limit exist to prevent endless chain of intermediate delta to be

628

# limit exist to prevent endless chain of intermediate delta to be

629

# created.

629

# created.

630

if (

630

if (

631

deltainfo.snapshotdepth is not None

631

deltainfo.snapshotdepth is not None

632

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

632

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

633

):

633

):

634

return False

634

return False

635

636

# bad delta if new intermediate snapshot is larger than the previous

636

# bad delta if new intermediate snapshot is larger than the previous

637

# snapshot

637

# snapshot

638

if (

638

if (

639

deltainfo.snapshotdepth

639

deltainfo.snapshotdepth

640

and revlog.length(deltainfo.base) < deltainfo.deltalen

640

and revlog.length(deltainfo.base) < deltainfo.deltalen

641

):

641

):

642

return False

642

return False

643

644

return True

644

return True

645

646

647

# If a revision's full text is that much bigger than a base candidate full

647

# If a revision's full text is that much bigger than a base candidate full

648

# text's, it is very unlikely that it will produce a valid delta. We no longer

648

# text's, it is very unlikely that it will produce a valid delta. We no longer

649

# consider these candidates.

649

# consider these candidates.

650

LIMIT_BASE2TEXT = 500

650

LIMIT_BASE2TEXT = 500

651

652

653

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

653

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

654

"""Provides group of revision to be tested as delta base

654

"""Provides group of revision to be tested as delta base

655

656

This top level function focus on emitting groups with unique and worthwhile

656

This top level function focus on emitting groups with unique and worthwhile

657

content. See _raw_candidate_groups for details about the group order.

657

content. See _raw_candidate_groups for details about the group order.

658

"""

658

"""

659

# should we try to build a delta?

659

# should we try to build a delta?

660

if not (len(revlog) and revlog._storedeltachains):

660

if not (len(revlog) and revlog._storedeltachains):

661

yield None

661

yield None

662

return

662

return

663

664

deltalength = revlog.length

664

deltalength = revlog.length

665

deltaparent = revlog.deltaparent

665

deltaparent = revlog.deltaparent

666

sparse = revlog._sparserevlog

666

sparse = revlog._sparserevlog

667

good = None

667

good = None

668

669

deltas_limit = textlen * LIMIT_DELTA2TEXT

669

deltas_limit = textlen * LIMIT_DELTA2TEXT

670

671

tested = {nullrev}

671

tested = {nullrev}

672

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

672

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

673

while True:

673

while True:

674

temptative = candidates.send(good)

674

temptative = candidates.send(good)

675

if temptative is None:

675

if temptative is None:

676

break

676

break

677

group = []

677

group = []

678

for rev in temptative:

678

for rev in temptative:

679

# skip over empty delta (no need to include them in a chain)

679

# skip over empty delta (no need to include them in a chain)

680

while revlog._generaldelta and not (

680

while revlog._generaldelta and not (

681

rev == nullrev or rev in tested or deltalength(rev)

681

rev == nullrev or rev in tested or deltalength(rev)

682

):

682

):

683

tested.add(rev)

683

tested.add(rev)

684

rev = deltaparent(rev)

684

rev = deltaparent(rev)

685

# no need to try a delta against nullrev, this will be done as a

685

# no need to try a delta against nullrev, this will be done as a

686

# last resort.

686

# last resort.

687

if rev == nullrev:

687

if rev == nullrev:

688

continue

688

continue

689

# filter out revision we tested already

689

# filter out revision we tested already

690

if rev in tested:

690

if rev in tested:

691

continue

691

continue

692

tested.add(rev)

692

tested.add(rev)

693

# filter out delta base that will never produce good delta

693

# filter out delta base that will never produce good delta

694

if deltas_limit < revlog.length(rev):

694

if deltas_limit < revlog.length(rev):

695

continue

695

continue

696

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

696

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

697

continue

697

continue

698

# no delta for rawtext-changing revs (see "candelta" for why)

698

# no delta for rawtext-changing revs (see "candelta" for why)

699

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

699

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

700

continue

700

continue

701

# If we reach here, we are about to build and test a delta.

701

# If we reach here, we are about to build and test a delta.

702

# The delta building process will compute the chaininfo in all

702

# The delta building process will compute the chaininfo in all

703

# case, since that computation is cached, it is fine to access it

703

# case, since that computation is cached, it is fine to access it

704

# here too.

704

# here too.

705

chainlen, chainsize = revlog._chaininfo(rev)

705

chainlen, chainsize = revlog._chaininfo(rev)

706

# if chain will be too long, skip base

706

# if chain will be too long, skip base

707

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

707

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

708

continue

708

continue

709

# if chain already have too much data, skip base

709

# if chain already have too much data, skip base

710

if deltas_limit < chainsize:

710

if deltas_limit < chainsize:

711

continue

711

continue

712

if sparse and revlog.upperboundcomp is not None:

712

if sparse and revlog.upperboundcomp is not None:

713

maxcomp = revlog.upperboundcomp

713

maxcomp = revlog.upperboundcomp

714

basenotsnap = (p1, p2, nullrev)

714

basenotsnap = (p1, p2, nullrev)

715

if rev not in basenotsnap and revlog.issnapshot(rev):

715

if rev not in basenotsnap and revlog.issnapshot(rev):

716

snapshotdepth = revlog.snapshotdepth(rev)

716

snapshotdepth = revlog.snapshotdepth(rev)

717

# If text is significantly larger than the base, we can

717

# If text is significantly larger than the base, we can

718

# expect the resulting delta to be proportional to the size

718

# expect the resulting delta to be proportional to the size

719

# difference

719

# difference

720

revsize = revlog.rawsize(rev)

720

revsize = revlog.rawsize(rev)

721

rawsizedistance = max(textlen - revsize, 0)

721

rawsizedistance = max(textlen - revsize, 0)

722

# use an estimate of the compression upper bound.

722

# use an estimate of the compression upper bound.

723

lowestrealisticdeltalen = rawsizedistance // maxcomp

723

lowestrealisticdeltalen = rawsizedistance // maxcomp

724

725

# check the absolute constraint on the delta size

725

# check the absolute constraint on the delta size

726

snapshotlimit = textlen >> snapshotdepth

726

snapshotlimit = textlen >> snapshotdepth

727

if snapshotlimit < lowestrealisticdeltalen:

727

if snapshotlimit < lowestrealisticdeltalen:

728

# delta lower bound is larger than accepted upper bound

728

# delta lower bound is larger than accepted upper bound

729

continue

729

continue

730

731

# check the relative constraint on the delta size

731

# check the relative constraint on the delta size

732

revlength = revlog.length(rev)

732

revlength = revlog.length(rev)

733

if revlength < lowestrealisticdeltalen:

733

if revlength < lowestrealisticdeltalen:

734

# delta probable lower bound is larger than target base

734

# delta probable lower bound is larger than target base

735

continue

735

continue

736

737

group.append(rev)

737

group.append(rev)

738

if group:

738

if group:

739

# XXX: in the sparse revlog case, group can become large,

739

# XXX: in the sparse revlog case, group can become large,

740

# impacting performances. Some bounding or slicing mecanism

740

# impacting performances. Some bounding or slicing mecanism

741

# would help to reduce this impact.

741

# would help to reduce this impact.

742

good = yield tuple(group)

742

good = yield tuple(group)

743

yield None

743

yield None

744

745

746

def _findsnapshots(revlog, cache, start_rev):

746

def _findsnapshots(revlog, cache, start_rev):

747

"""find snapshot from start_rev to tip"""

747

"""find snapshot from start_rev to tip"""

748

if util.safehasattr(revlog.index, b'findsnapshots'):

748

if util.safehasattr(revlog.index, b'findsnapshots'):

749

revlog.index.findsnapshots(cache, start_rev)

749

revlog.index.findsnapshots(cache, start_rev)

750

else:

750

else:

751

deltaparent = revlog.deltaparent

751

deltaparent = revlog.deltaparent

752

issnapshot = revlog.issnapshot

752

issnapshot = revlog.issnapshot

753

for rev in revlog.revs(start_rev):

753

for rev in revlog.revs(start_rev):

754

if issnapshot(rev):

754

if issnapshot(rev):

755

cache[deltaparent(rev)].append(rev)

755

cache[deltaparent(rev)].append(rev)

756

757

758

def _refinedgroups(revlog, p1, p2, cachedelta):

758

def _refinedgroups(revlog, p1, p2, cachedelta):

759

good = None

759

good = None

760

# First we try to reuse a the delta contained in the bundle.

760

# First we try to reuse a the delta contained in the bundle.

761

# (or from the source revlog)

761

# (or from the source revlog)

762

#

762

#

763

# This logic only applies to general delta repositories and can be disabled

763

# This logic only applies to general delta repositories and can be disabled

764

# through configuration. Disabling reuse source delta is useful when

764

# through configuration. Disabling reuse source delta is useful when

765

# we want to make sure we recomputed "optimal" deltas.

765

# we want to make sure we recomputed "optimal" deltas.

766

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

766

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

767

# Assume what we received from the server is a good choice

767

# Assume what we received from the server is a good choice

768

# build delta will reuse the cache

768

# build delta will reuse the cache

769

good = yield (cachedelta[0],)

769

good = yield (cachedelta[0],)

770

if good is not None:

770

if good is not None:

771

yield None

771

yield None

772

return

772

return

773

snapshots = collections.defaultdict(list)

773

snapshots = collections.defaultdict(list)

774

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

774

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

775

good = yield candidates

775

good = yield candidates

776

if good is not None:

776

if good is not None:

777

break

777

break

778

779

# If sparse revlog is enabled, we can try to refine the available deltas

779

# If sparse revlog is enabled, we can try to refine the available deltas

780

if not revlog._sparserevlog:

780

if not revlog._sparserevlog:

781

yield None

781

yield None

782

return

782

return

783

784

# if we have a refinable value, try to refine it

784

# if we have a refinable value, try to refine it

785

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

785

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

786

# refine snapshot down

786

# refine snapshot down

787

previous = None

787

previous = None

788

while previous != good:

788

while previous != good:

789

previous = good

789

previous = good

790

base = revlog.deltaparent(good)

790

base = revlog.deltaparent(good)

791

if base == nullrev:

791

if base == nullrev:

792

break

792

break

793

good = yield (base,)

793

good = yield (base,)

794

# refine snapshot up

794

# refine snapshot up

795

if not snapshots:

795

if not snapshots:

796

_findsnapshots(revlog, snapshots, good + 1)

796

_findsnapshots(revlog, snapshots, good + 1)

797

previous = None

797

previous = None

798

while good != previous:

798

while good != previous:

799

previous = good

799

previous = good

800

children = tuple(sorted(c for c in snapshots[good]))

800

children = tuple(sorted(c for c in snapshots[good]))

801

good = yield children

801

good = yield children

802

803

# we have found nothing

803

# we have found nothing

804

yield None

804

yield None

805

806

807

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

807

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

808

"""Provides group of revision to be tested as delta base

808

"""Provides group of revision to be tested as delta base

809

810

This lower level function focus on emitting delta theorically interresting

810

This lower level function focus on emitting delta theorically interresting

811

without looking it any practical details.

811

without looking it any practical details.

812

813

The group order aims at providing fast or small candidates first.

813

The group order aims at providing fast or small candidates first.

814

"""

814

"""

815

gdelta = revlog._generaldelta

815

gdelta = revlog._generaldelta

816

# gate sparse behind general-delta because of issue6056

816

# gate sparse behind general-delta because of issue6056

817

sparse = gdelta and revlog._sparserevlog

817

sparse = gdelta and revlog._sparserevlog

818

curr = len(revlog)

818

curr = len(revlog)

819

prev = curr - 1

819

prev = curr - 1

820

deltachain = lambda rev: revlog._deltachain(rev)[0]

820

deltachain = lambda rev: revlog._deltachain(rev)[0]

821

822

if gdelta:

822

if gdelta:

823

# exclude already lazy tested base if any

823

# exclude already lazy tested base if any

824

parents = [p for p in (p1, p2) if p != nullrev]

824

parents = [p for p in (p1, p2) if p != nullrev]

825

826

if not revlog._deltabothparents and len(parents) == 2:

826

if not revlog._deltabothparents and len(parents) == 2:

827

parents.sort()

827

parents.sort()

828

# To minimize the chance of having to build a fulltext,

828

# To minimize the chance of having to build a fulltext,

829

# pick first whichever parent is closest to us (max rev)

829

# pick first whichever parent is closest to us (max rev)

830

yield (parents[1],)

830

yield (parents[1],)

831

# then the other one (min rev) if the first did not fit

831

# then the other one (min rev) if the first did not fit

832

yield (parents[0],)

832

yield (parents[0],)

833

elif len(parents) > 0:

833

elif len(parents) > 0:

834

# Test all parents (1 or 2), and keep the best candidate

834

# Test all parents (1 or 2), and keep the best candidate

835

yield parents

835

yield parents

836

837

if sparse and parents:

837

if sparse and parents:

838

if snapshots is None:

838

if snapshots is None:

839

# map: base-rev: snapshot-rev

839

# map: base-rev: snapshot-rev

840

snapshots = collections.defaultdict(list)

840

snapshots = collections.defaultdict(list)

841

# See if we can use an existing snapshot in the parent chains to use as

841

# See if we can use an existing snapshot in the parent chains to use as

842

# a base for a new intermediate-snapshot

842

# a base for a new intermediate-snapshot

843

#

843

#

844

# search for snapshot in parents delta chain

844

# search for snapshot in parents delta chain

845

# map: snapshot-level: snapshot-rev

845

# map: snapshot-level: snapshot-rev

846

parents_snaps = collections.defaultdict(set)

846

parents_snaps = collections.defaultdict(set)

847

candidate_chains = [deltachain(p) for p in parents]

847

candidate_chains = [deltachain(p) for p in parents]

848

for chain in candidate_chains:

848

for chain in candidate_chains:

849

for idx, s in enumerate(chain):

849

for idx, s in enumerate(chain):

850

if not revlog.issnapshot(s):

850

if not revlog.issnapshot(s):

851

break

851

break

852

parents_snaps[idx].add(s)

852

parents_snaps[idx].add(s)

853

snapfloor = min(parents_snaps[0]) + 1

853

snapfloor = min(parents_snaps[0]) + 1

854

_findsnapshots(revlog, snapshots, snapfloor)

854

_findsnapshots(revlog, snapshots, snapfloor)

855

# search for the highest "unrelated" revision

855

# search for the highest "unrelated" revision

856

#

856

#

857

# Adding snapshots used by "unrelated" revision increase the odd we

857

# Adding snapshots used by "unrelated" revision increase the odd we

858

# reuse an independant, yet better snapshot chain.

858

# reuse an independant, yet better snapshot chain.

859

#

859

#

860

# XXX instead of building a set of revisions, we could lazily enumerate

860

# XXX instead of building a set of revisions, we could lazily enumerate

861

# over the chains. That would be more efficient, however we stick to

861

# over the chains. That would be more efficient, however we stick to

862

# simple code for now.

862

# simple code for now.

863

all_revs = set()

863

all_revs = set()

864

for chain in candidate_chains:

864

for chain in candidate_chains:

865

all_revs.update(chain)

865

all_revs.update(chain)

866

other = None

866

other = None

867

for r in revlog.revs(prev, snapfloor):

867

for r in revlog.revs(prev, snapfloor):

868

if r not in all_revs:

868

if r not in all_revs:

869

other = r

869

other = r

870

break

870

break

871

if other is not None:

871

if other is not None:

872

# To avoid unfair competition, we won't use unrelated intermediate

872

# To avoid unfair competition, we won't use unrelated intermediate

873

# snapshot that are deeper than the ones from the parent delta

873

# snapshot that are deeper than the ones from the parent delta

874

# chain.

874

# chain.

875

max_depth = max(parents_snaps.keys())

875

max_depth = max(parents_snaps.keys())

876

chain = deltachain(other)

876

chain = deltachain(other)

877

for idx, s in enumerate(chain):

877

for idx, s in enumerate(chain):

878

if s < snapfloor:

878

if s < snapfloor:

879

continue

879

continue

880

if max_depth < idx:

880

if max_depth < idx:

881

break

881

break

882

if not revlog.issnapshot(s):

882

if not revlog.issnapshot(s):

883

break

883

break

884

parents_snaps[idx].add(s)

884

parents_snaps[idx].add(s)

885

# Test them as possible intermediate snapshot base

885

# Test them as possible intermediate snapshot base

886

# We test them from highest to lowest level. High level one are more

886

# We test them from highest to lowest level. High level one are more

887

# likely to result in small delta

887

# likely to result in small delta

888

floor = None

888

floor = None

889

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

889

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

890

siblings = set()

890

siblings = set()

891

for s in snaps:

891

for s in snaps:

892

siblings.update(snapshots[s])

892

siblings.update(snapshots[s])

893

# Before considering making a new intermediate snapshot, we check

893

# Before considering making a new intermediate snapshot, we check

894

# if an existing snapshot, children of base we consider, would be

894

# if an existing snapshot, children of base we consider, would be

895

# suitable.

895

# suitable.

896

#

896

#

897

# It give a change to reuse a delta chain "unrelated" to the

897

# It give a change to reuse a delta chain "unrelated" to the

898

# current revision instead of starting our own. Without such

898

# current revision instead of starting our own. Without such

899

# re-use, topological branches would keep reopening new chains.

899

# re-use, topological branches would keep reopening new chains.

900

# Creating more and more snapshot as the repository grow.

900

# Creating more and more snapshot as the repository grow.

901

902

if floor is not None:

902

if floor is not None:

903

# We only do this for siblings created after the one in our

903

# We only do this for siblings created after the one in our

904

# parent's delta chain. Those created before has less chances

904

# parent's delta chain. Those created before has less chances

905

# to be valid base since our ancestors had to create a new

905

# to be valid base since our ancestors had to create a new

906

# snapshot.

906

# snapshot.

907

siblings = [r for r in siblings if floor < r]

907

siblings = [r for r in siblings if floor < r]

908

yield tuple(sorted(siblings))

908

yield tuple(sorted(siblings))

909

# then test the base from our parent's delta chain.

909

# then test the base from our parent's delta chain.

910

yield tuple(sorted(snaps))

910

yield tuple(sorted(snaps))

911

floor = min(snaps)

911

floor = min(snaps)

912

# No suitable base found in the parent chain, search if any full

912

# No suitable base found in the parent chain, search if any full

913

# snapshots emitted since parent's base would be a suitable base for an

913

# snapshots emitted since parent's base would be a suitable base for an

914

# intermediate snapshot.

914

# intermediate snapshot.

915

#

915

#

916

# It give a chance to reuse a delta chain unrelated to the current

916

# It give a chance to reuse a delta chain unrelated to the current

917

# revisions instead of starting our own. Without such re-use,

917

# revisions instead of starting our own. Without such re-use,

918

# topological branches would keep reopening new full chains. Creating

918

# topological branches would keep reopening new full chains. Creating

919

# more and more snapshot as the repository grow.

919

# more and more snapshot as the repository grow.

920

yield tuple(snapshots[nullrev])

920

yield tuple(snapshots[nullrev])

921

922

if not sparse:

922

if not sparse:

923

# other approach failed try against prev to hopefully save us a

923

# other approach failed try against prev to hopefully save us a

924

# fulltext.

924

# fulltext.

925

yield (prev,)

925

yield (prev,)

926

927

928

class deltacomputer(object):

928

class deltacomputer(object):

929

def __init__(self, revlog):

929

def __init__(self, revlog):

930

self.revlog = revlog

930

self.revlog = revlog

931

932

def buildtext(self, revinfo, fh):

932

def buildtext(self, revinfo, fh):

933

"""Builds a fulltext version of a revision

933

"""Builds a fulltext version of a revision

934

935

revinfo: revisioninfo instance that contains all needed info

935

revinfo: revisioninfo instance that contains all needed info

936

fh: file handle to either the .i or the .d revlog file,

936

fh: file handle to either the .i or the .d revlog file,

937

depending on whether it is inlined or not

937

depending on whether it is inlined or not

938

"""

938

"""

939

btext = revinfo.btext

939

btext = revinfo.btext

940

if btext[0] is not None:

940

if btext[0] is not None:

941

return btext[0]

941

return btext[0]

942

943

revlog = self.revlog

943

revlog = self.revlog

944

cachedelta = revinfo.cachedelta

944

cachedelta = revinfo.cachedelta

945

baserev = cachedelta[0]

945

baserev = cachedelta[0]

946

delta = cachedelta[1]

946

delta = cachedelta[1]

947

948

fulltext = btext[0] = _textfromdelta(

948

fulltext = btext[0] = _textfromdelta(

949

fh,

949

fh,

950

revlog,

950

revlog,

951

baserev,

951

baserev,

952

delta,

952

delta,

953

revinfo.p1,

953

revinfo.p1,

954

revinfo.p2,

954

revinfo.p2,

955

revinfo.flags,

955

revinfo.flags,

956

revinfo.node,

956

revinfo.node,

957

)

957

)

958

return fulltext

958

return fulltext

959

960

def _builddeltadiff(self, base, revinfo, fh):

960

def _builddeltadiff(self, base, revinfo, fh):

961

revlog = self.revlog

961

revlog = self.revlog

962

t = self.buildtext(revinfo, fh)

962

t = self.buildtext(revinfo, fh)

963

if revlog.iscensored(base):

963

if revlog.iscensored(base):

964

# deltas based on a censored revision must replace the

964

# deltas based on a censored revision must replace the

965

# full content in one patch, so delta works everywhere

965

# full content in one patch, so delta works everywhere

966

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

966

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

967

delta = header + t

967

delta = header + t

968

else:

968

else:

969

ptext = revlog.rawdata(base, _df=fh)

969

ptext = revlog.rawdata(base, _df=fh)

970

delta = mdiff.textdiff(ptext, t)

970

delta = mdiff.textdiff(ptext, t)

971

972

return delta

972

return delta

973

974

def _builddeltainfo(self, revinfo, base, fh):

974

def _builddeltainfo(self, revinfo, base, fh):

975

# can we use the cached delta?

975

# can we use the cached delta?

976

revlog = self.revlog

976

revlog = self.revlog

977

chainbase = revlog.chainbase(base)

977

chainbase = revlog.chainbase(base)

978

if revlog._generaldelta:

978

if revlog._generaldelta:

979

deltabase = base

979

deltabase = base

980

else:

980

else:

981

deltabase = chainbase

981

deltabase = chainbase

982

snapshotdepth = None

982

snapshotdepth = None

983

if revlog._sparserevlog and deltabase == nullrev:

983

if revlog._sparserevlog and deltabase == nullrev:

984

snapshotdepth = 0

984

snapshotdepth = 0

985

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

985

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

986

# A delta chain should always be one full snapshot,

986

# A delta chain should always be one full snapshot,

987

# zero or more semi-snapshots, and zero or more deltas

987

# zero or more semi-snapshots, and zero or more deltas

988

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

988

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

989

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

989

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

990

snapshotdepth = len(revlog._deltachain(deltabase)[0])

990

snapshotdepth = len(revlog._deltachain(deltabase)[0])

991

delta = None

991

delta = None

992

if revinfo.cachedelta:

992

if revinfo.cachedelta:

993

cachebase, cachediff = revinfo.cachedelta

993

cachebase, cachediff = revinfo.cachedelta

994

# check if the diff still apply

994

# check if the diff still apply

995

currentbase = cachebase

995

currentbase = cachebase

996

while (

996

while (

997

currentbase != nullrev

997

currentbase != nullrev

998

and currentbase != base

998

and currentbase != base

999

and self.revlog.length(currentbase) == 0

999

and self.revlog.length(currentbase) == 0

1000

):

1000

):

1001

currentbase = self.revlog.deltaparent(currentbase)

1001

currentbase = self.revlog.deltaparent(currentbase)

1002

if self.revlog._lazydelta and currentbase == base:

1002

if self.revlog._lazydelta and currentbase == base:

1003

delta = revinfo.cachedelta[1]

1003

delta = revinfo.cachedelta[1]

1004

if delta is None:

1004

if delta is None:

1005

delta = self._builddeltadiff(base, revinfo, fh)

1005

delta = self._builddeltadiff(base, revinfo, fh)

1006

# snapshotdept need to be neither None nor 0 level snapshot

1006

# snapshotdept need to be neither None nor 0 level snapshot

1007

if revlog.upperboundcomp is not None and snapshotdepth:

1007

if revlog.upperboundcomp is not None and snapshotdepth:

1008

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1008

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1009

snapshotlimit = revinfo.textlen >> snapshotdepth

1009

snapshotlimit = revinfo.textlen >> snapshotdepth

1010

if snapshotlimit < lowestrealisticdeltalen:

1010

if snapshotlimit < lowestrealisticdeltalen:

1011

return None

1011

return None

1012

if revlog.length(base) < lowestrealisticdeltalen:

1012

if revlog.length(base) < lowestrealisticdeltalen:

1013

return None

1013

return None

1014

header, data = revlog.compress(delta)

1014

header, data = revlog.compress(delta)

1015

deltalen = len(header) + len(data)

1015

deltalen = len(header) + len(data)

1016

offset = revlog.end(len(revlog) - 1)

1016

offset = revlog.end(len(revlog) - 1)

1017

dist = deltalen + offset - revlog.start(chainbase)

1017

dist = deltalen + offset - revlog.start(chainbase)

1018

chainlen, compresseddeltalen = revlog._chaininfo(base)

1018

chainlen, compresseddeltalen = revlog._chaininfo(base)

1019

chainlen += 1

1019

chainlen += 1

1020

compresseddeltalen += deltalen

1020

compresseddeltalen += deltalen

1021

1022

return _deltainfo(

1022

return _deltainfo(

1023

dist,

1023

dist,

1024

deltalen,

1024

deltalen,

1025

(header, data),

1025

(header, data),

1026

deltabase,

1026

deltabase,

1027

chainbase,

1027

chainbase,

1028

chainlen,

1028

chainlen,

1029

compresseddeltalen,

1029

compresseddeltalen,

1030

snapshotdepth,

1030

snapshotdepth,

1031

)

1031

)

1032

1033

def _fullsnapshotinfo(self, fh, revinfo):

1033

def _fullsnapshotinfo(self, fh, revinfo):

1034

curr = len(self.revlog)

1034

curr = len(self.revlog)

1035

rawtext = self.buildtext(revinfo, fh)

1035

rawtext = self.buildtext(revinfo, fh)

1036

data = self.revlog.compress(rawtext)

1036

data = self.revlog.compress(rawtext)

1037

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1037

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1038

deltabase = chainbase = curr

1038

deltabase = chainbase = curr

1039

snapshotdepth = 0

1039

snapshotdepth = 0

1040

chainlen = 1

1040

chainlen = 1

1041

1042

return _deltainfo(

1042

return _deltainfo(

1043

dist,

1043

dist,

1044

deltalen,

1044

deltalen,

1045

data,

1045

data,

1046

deltabase,

1046

deltabase,

1047

chainbase,

1047

chainbase,

1048

chainlen,

1048

chainlen,

1049

compresseddeltalen,

1049

compresseddeltalen,

1050

snapshotdepth,

1050

snapshotdepth,

1051

)

1051

)

1052

1053

def finddeltainfo(self, revinfo, fh):

1053

def finddeltainfo(self, revinfo, fh, excluded_bases=None):

1054

"""Find an acceptable delta against a candidate revision

1054

"""Find an acceptable delta against a candidate revision

1055

1056

revinfo: information about the revision (instance of _revisioninfo)

1056

revinfo: information about the revision (instance of _revisioninfo)

1057

fh: file handle to either the .i or the .d revlog file,

1057

fh: file handle to either the .i or the .d revlog file,

1058

depending on whether it is inlined or not

1058

depending on whether it is inlined or not

1059

1060

Returns the first acceptable candidate revision, as ordered by

1060

Returns the first acceptable candidate revision, as ordered by

1061

_candidategroups

1061

_candidategroups

1062

1063

If no suitable deltabase is found, we return delta info for a full

1063

If no suitable deltabase is found, we return delta info for a full

1064

snapshot.

1064

snapshot.

1065

1066

`excluded_bases` is an optional set of revision that cannot be used as

1067

a delta base. Use this to recompute delta suitable in censor or strip

1068

context.

1065

"""

1069

"""

1066

if not revinfo.textlen:

1070

if not revinfo.textlen:

1067

return self._fullsnapshotinfo(fh, revinfo)

1071

return self._fullsnapshotinfo(fh, revinfo)

1068

1072

1073

if excluded_bases is None:

1074

excluded_bases = set()

1075

1069

# no delta for flag processor revision (see "candelta" for why)

1076

# no delta for flag processor revision (see "candelta" for why)

1070

# not calling candelta since only one revision needs test, also to

1077

# not calling candelta since only one revision needs test, also to

1071

# avoid overhead fetching flags again.

1078

# avoid overhead fetching flags again.

1072

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1079

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1073

return self._fullsnapshotinfo(fh, revinfo)

1080

return self._fullsnapshotinfo(fh, revinfo)

1074

1081

1075

cachedelta = revinfo.cachedelta

1082

cachedelta = revinfo.cachedelta

1076

p1 = revinfo.p1

1083

p1 = revinfo.p1

1077

p2 = revinfo.p2

1084

p2 = revinfo.p2

1078

revlog = self.revlog

1085

revlog = self.revlog

1079

1086

1080

deltainfo = None

1087

deltainfo = None

1081

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1088

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1082

groups = _candidategroups(

1089

groups = _candidategroups(

1083

self.revlog, revinfo.textlen, p1r, p2r, cachedelta

1090

self.revlog, revinfo.textlen, p1r, p2r, cachedelta

1084

)

1091

)

1085

candidaterevs = next(groups)

1092

candidaterevs = next(groups)

1086

while candidaterevs is not None:

1093

while candidaterevs is not None:

1087

nominateddeltas = []

1094

nominateddeltas = []

1088

if deltainfo is not None:

1095

if deltainfo is not None:

1089

# if we already found a good delta,

1096

# if we already found a good delta,

1090

# challenge it against refined candidates

1097

# challenge it against refined candidates

1091

nominateddeltas.append(deltainfo)

1098

nominateddeltas.append(deltainfo)

1092

for candidaterev in candidaterevs:

1099

for candidaterev in candidaterevs:

1100

if candidaterev in excluded_bases:

1101

continue

1093

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1102

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1094

if candidatedelta is not None:

1103

if candidatedelta is not None:

1095

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1104

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1096

nominateddeltas.append(candidatedelta)

1105

nominateddeltas.append(candidatedelta)

1097

if nominateddeltas:

1106

if nominateddeltas:

1098

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1107

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1099

if deltainfo is not None:

1108

if deltainfo is not None:

1100

candidaterevs = groups.send(deltainfo.base)

1109

candidaterevs = groups.send(deltainfo.base)

1101

else:

1110

else:

1102

candidaterevs = next(groups)

1111

candidaterevs = next(groups)

1103

1112

1104

if deltainfo is None:

1113

if deltainfo is None:

1105

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1114

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1106

return deltainfo

1115

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, b'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 snapshots = collections.defaultdict(list)
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: snapshot-rev
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if snapshotlimit < lowestrealisticdeltalen:
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
-                def finddeltainfo(self, revinfo, fh):
+                def finddeltainfo(self, revinfo, fh, excluded_bases=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
+                    `excluded_bases` is an optional set of revision that cannot be used as
+                    a delta base. Use this to recompute delta suitable in censor or strip
+                    context.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
+                    if excluded_bases is None:
+                        excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(
                         self.revlog, revinfo.textlen, p1r, p2r, cachedelta
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
+                            if candidaterev in excluded_bases:
+                                continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo