upstream/mercurial-mirror Commit - r39529:3ca144f1

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import heapq

13

import heapq

13

import struct

14

import struct

14

15

# import stuff from node for others to import from revlog

16

# import stuff from node for others to import from revlog

16

from ..node import (

17

from ..node import (

17

nullrev,

18

nullrev,

18

)

19

)

19

from ..i18n import _

20

from ..i18n import _

20

21

from .constants import (

22

from .constants import (

22

REVIDX_ISCENSORED,

23

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

25

)

25

26

from ..thirdparty import (

27

from ..thirdparty import (

27

attr,

28

attr,

28

)

29

)

29

30

from .. import (

31

from .. import (

31

error,

32

error,

32

mdiff,

33

mdiff,

33

)

34

)

34

35

RevlogError = error.RevlogError

36

RevlogError = error.RevlogError

36

CensoredNodeError = error.CensoredNodeError

37

CensoredNodeError = error.CensoredNodeError

37

38

# maximum <delta-chain-data>/<revision-text-length> ratio

39

# maximum <delta-chain-data>/<revision-text-length> ratio

39

LIMIT_DELTA2TEXT = 2

40

LIMIT_DELTA2TEXT = 2

40

41

class _testrevlog(object):

42

class _testrevlog(object):

42

"""minimalist fake revlog to use in doctests"""

43

"""minimalist fake revlog to use in doctests"""

43

44

def __init__(self, data, density=0.5, mingap=0):

45

def __init__(self, data, density=0.5, mingap=0):

45

"""data is an list of revision payload boundaries"""

46

"""data is an list of revision payload boundaries"""

46

self._data = data

47

self._data = data

47

self._srdensitythreshold = density

48

self._srdensitythreshold = density

48

self._srmingapsize = mingap

49

self._srmingapsize = mingap

49

50

def start(self, rev):

51

def start(self, rev):

51

if rev == 0:

52

if rev == 0:

52

return 0

53

return 0

53

return self._data[rev - 1]

54

return self._data[rev - 1]

54

55

def end(self, rev):

56

def end(self, rev):

56

return self._data[rev]

57

return self._data[rev]

57

58

def length(self, rev):

59

def length(self, rev):

59

return self.end(rev) - self.start(rev)

60

return self.end(rev) - self.start(rev)

60

61

def __len__(self):

62

def __len__(self):

62

return len(self._data)

63

return len(self._data)

63

64

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

65

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

65

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

67

``revs`` is sliced into groups that should be read in one time.

68

``revs`` is sliced into groups that should be read in one time.

68

Assume that revs are sorted.

69

Assume that revs are sorted.

69

70

The initial chunk is sliced until the overall density (payload/chunks-span

71

The initial chunk is sliced until the overall density (payload/chunks-span

71

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

`revlog._srmingapsize` is skipped.

73

`revlog._srmingapsize` is skipped.

73

74

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

For consistency with other slicing choice, this limit won't go lower than

76

For consistency with other slicing choice, this limit won't go lower than

76

`revlog._srmingapsize`.

77

`revlog._srmingapsize`.

77

78

If individual revisions chunk are larger than this limit, they will still

79

If individual revisions chunk are larger than this limit, they will still

79

be raised individually.

80

be raised individually.

80

81

>>> revlog = _testrevlog([

82

>>> revlog = _testrevlog([

82

... 5, #00 (5)

83

... 5, #00 (5)

83

... 10, #01 (5)

84

... 10, #01 (5)

84

... 12, #02 (2)

85

... 12, #02 (2)

85

... 12, #03 (empty)

86

... 12, #03 (empty)

86

... 27, #04 (15)

87

... 27, #04 (15)

87

... 31, #05 (4)

88

... 31, #05 (4)

88

... 31, #06 (empty)

89

... 31, #06 (empty)

89

... 42, #07 (11)

90

... 42, #07 (11)

90

... 47, #08 (5)

91

... 47, #08 (5)

91

... 47, #09 (empty)

92

... 47, #09 (empty)

92

... 48, #10 (1)

93

... 48, #10 (1)

93

... 51, #11 (3)

94

... 51, #11 (3)

94

... 74, #12 (23)

95

... 74, #12 (23)

95

... 85, #13 (11)

96

... 85, #13 (11)

96

... 86, #14 (1)

97

... 86, #14 (1)

97

... 91, #15 (5)

98

... 91, #15 (5)

98

... ])

99

... ])

99

100

>>> list(slicechunk(revlog, list(range(16))))

101

>>> list(slicechunk(revlog, list(range(16))))

101

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

>>> list(slicechunk(revlog, [0, 15]))

103

>>> list(slicechunk(revlog, [0, 15]))

103

[[0], [15]]

104

[[0], [15]]

104

>>> list(slicechunk(revlog, [0, 11, 15]))

105

>>> list(slicechunk(revlog, [0, 11, 15]))

105

[[0], [11], [15]]

106

[[0], [11], [15]]

106

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

[[0], [11, 13, 15]]

108

[[0], [11, 13, 15]]

108

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

[[1, 2], [5, 8, 10, 11], [14]]

110

[[1, 2], [5, 8, 10, 11], [14]]

110

111

Slicing with a maximum chunk size

112

Slicing with a maximum chunk size

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

[[0], [11], [13], [15]]

114

[[0], [11], [13], [15]]

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

[[0], [11], [13, 15]]

116

[[0], [11], [13, 15]]

116

"""

117

"""

117

if targetsize is not None:

118

if targetsize is not None:

118

targetsize = max(targetsize, revlog._srmingapsize)

119

targetsize = max(targetsize, revlog._srmingapsize)

119

# targetsize should not be specified when evaluating delta candidates:

120

# targetsize should not be specified when evaluating delta candidates:

120

# * targetsize is used to ensure we stay within specification when reading,

121

# * targetsize is used to ensure we stay within specification when reading,

121

# * deltainfo is used to pick are good delta chain when writing.

122

# * deltainfo is used to pick are good delta chain when writing.

122

if not (deltainfo is None or targetsize is None):

123

if not (deltainfo is None or targetsize is None):

123

msg = 'cannot use `targetsize` with a `deltainfo`'

124

msg = 'cannot use `targetsize` with a `deltainfo`'

124

raise error.ProgrammingError(msg)

125

raise error.ProgrammingError(msg)

125

for chunk in _slicechunktodensity(revlog, revs,

126

for chunk in _slicechunktodensity(revlog, revs,

126

deltainfo,

127

deltainfo,

127

revlog._srdensitythreshold,

128

revlog._srdensitythreshold,

128

revlog._srmingapsize):

129

revlog._srmingapsize):

129

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

yield subchunk

131

yield subchunk

131

132

def _slicechunktosize(revlog, revs, targetsize=None):

133

def _slicechunktosize(revlog, revs, targetsize=None):

133

"""slice revs to match the target size

134

"""slice revs to match the target size

134

135

This is intended to be used on chunk that density slicing selected by that

136

This is intended to be used on chunk that density slicing selected by that

136

are still too large compared to the read garantee of revlog. This might

137

are still too large compared to the read garantee of revlog. This might

137

happens when "minimal gap size" interrupted the slicing or when chain are

138

happens when "minimal gap size" interrupted the slicing or when chain are

138

built in a way that create large blocks next to each other.

139

built in a way that create large blocks next to each other.

139

140

>>> revlog = _testrevlog([

141

>>> revlog = _testrevlog([

141

... 3, #0 (3)

142

... 3, #0 (3)

142

... 5, #1 (2)

143

... 5, #1 (2)

143

... 6, #2 (1)

144

... 6, #2 (1)

144

... 8, #3 (2)

145

... 8, #3 (2)

145

... 8, #4 (empty)

146

... 8, #4 (empty)

146

... 11, #5 (3)

147

... 11, #5 (3)

147

... 12, #6 (1)

148

... 12, #6 (1)

148

... 13, #7 (1)

149

... 13, #7 (1)

149

... 14, #8 (1)

150

... 14, #8 (1)

150

... ])

151

... ])

151

152

Cases where chunk is already small enough

153

Cases where chunk is already small enough

153

>>> list(_slicechunktosize(revlog, [0], 3))

154

>>> list(_slicechunktosize(revlog, [0], 3))

154

[[0]]

155

[[0]]

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

[[6, 7]]

157

[[6, 7]]

157

>>> list(_slicechunktosize(revlog, [0], None))

158

>>> list(_slicechunktosize(revlog, [0], None))

158

[[0]]

159

[[0]]

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

[[6, 7]]

161

[[6, 7]]

161

162

cases where we need actual slicing

163

cases where we need actual slicing

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

[[0], [1]]

165

[[0], [1]]

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

[[1], [3]]

167

[[1], [3]]

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

[[1, 2], [3]]

169

[[1, 2], [3]]

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

[[3], [5]]

171

[[3], [5]]

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

[[3], [5]]

173

[[3], [5]]

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

[[5], [6, 7, 8]]

175

[[5], [6, 7, 8]]

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

178

Case with too large individual chunk (must return valid chunk)

179

Case with too large individual chunk (must return valid chunk)

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

[[0], [1]]

181

[[0], [1]]

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

[[1], [3]]

183

[[1], [3]]

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

[[3], [5]]

185

[[3], [5]]

185

"""

186

"""

186

assert targetsize is None or 0 <= targetsize

187

assert targetsize is None or 0 <= targetsize

187

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

188

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

188

yield revs

189

yield revs

189

return

190

return

190

191

startrevidx = 0

192

startrevidx = 0

192

startdata = revlog.start(revs[0])

193

startdata = revlog.start(revs[0])

193

endrevidx = 0

194

endrevidx = 0

194

iterrevs = enumerate(revs)

195

iterrevs = enumerate(revs)

195

next(iterrevs) # skip first rev.

196

next(iterrevs) # skip first rev.

196

for idx, r in iterrevs:

197

for idx, r in iterrevs:

197

span = revlog.end(r) - startdata

198

span = revlog.end(r) - startdata

198

if span <= targetsize:

199

if span <= targetsize:

199

endrevidx = idx

200

endrevidx = idx

200

else:

201

else:

201

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

202

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

202

if chunk:

203

if chunk:

203

yield chunk

204

yield chunk

204

startrevidx = idx

205

startrevidx = idx

205

startdata = revlog.start(r)

206

startdata = revlog.start(r)

206

endrevidx = idx

207

endrevidx = idx

207

yield _trimchunk(revlog, revs, startrevidx)

208

yield _trimchunk(revlog, revs, startrevidx)

208

209

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

210

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

210

mingapsize=0):

211

mingapsize=0):

211

"""slice revs to reduce the amount of unrelated data to be read from disk.

212

"""slice revs to reduce the amount of unrelated data to be read from disk.

212

213

``revs`` is sliced into groups that should be read in one time.

214

``revs`` is sliced into groups that should be read in one time.

214

Assume that revs are sorted.

215

Assume that revs are sorted.

215

216

``deltainfo`` is a _deltainfo instance of a revision that we would append

217

``deltainfo`` is a _deltainfo instance of a revision that we would append

217

to the top of the revlog.

218

to the top of the revlog.

218

219

The initial chunk is sliced until the overall density (payload/chunks-span

220

The initial chunk is sliced until the overall density (payload/chunks-span

220

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

221

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

221

skipped.

222

skipped.

222

223

>>> revlog = _testrevlog([

224

>>> revlog = _testrevlog([

224

... 5, #00 (5)

225

... 5, #00 (5)

225

... 10, #01 (5)

226

... 10, #01 (5)

226

... 12, #02 (2)

227

... 12, #02 (2)

227

... 12, #03 (empty)

228

... 12, #03 (empty)

228

... 27, #04 (15)

229

... 27, #04 (15)

229

... 31, #05 (4)

230

... 31, #05 (4)

230

... 31, #06 (empty)

231

... 31, #06 (empty)

231

... 42, #07 (11)

232

... 42, #07 (11)

232

... 47, #08 (5)

233

... 47, #08 (5)

233

... 47, #09 (empty)

234

... 47, #09 (empty)

234

... 48, #10 (1)

235

... 48, #10 (1)

235

... 51, #11 (3)

236

... 51, #11 (3)

236

... 74, #12 (23)

237

... 74, #12 (23)

237

... 85, #13 (11)

238

... 85, #13 (11)

238

... 86, #14 (1)

239

... 86, #14 (1)

239

... 91, #15 (5)

240

... 91, #15 (5)

240

... ])

241

... ])

241

242

>>> list(_slicechunktodensity(revlog, list(range(16))))

243

>>> list(_slicechunktodensity(revlog, list(range(16))))

243

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

244

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

244

>>> list(_slicechunktodensity(revlog, [0, 15]))

245

>>> list(_slicechunktodensity(revlog, [0, 15]))

245

[[0], [15]]

246

[[0], [15]]

246

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

247

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

247

[[0], [11], [15]]

248

[[0], [11], [15]]

248

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

249

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

249

[[0], [11, 13, 15]]

250

[[0], [11, 13, 15]]

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

251

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

251

[[1, 2], [5, 8, 10, 11], [14]]

252

[[1, 2], [5, 8, 10, 11], [14]]

252

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

... mingapsize=20))

254

... mingapsize=20))

254

[[1, 2, 3, 5, 8, 10, 11], [14]]

255

[[1, 2, 3, 5, 8, 10, 11], [14]]

255

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

... targetdensity=0.95))

257

... targetdensity=0.95))

257

[[1, 2], [5], [8, 10, 11], [14]]

258

[[1, 2], [5], [8, 10, 11], [14]]

258

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

259

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

259

... targetdensity=0.95, mingapsize=12))

260

... targetdensity=0.95, mingapsize=12))

260

[[1, 2], [5, 8, 10, 11], [14]]

261

[[1, 2], [5, 8, 10, 11], [14]]

261

"""

262

"""

262

start = revlog.start

263

start = revlog.start

263

length = revlog.length

264

length = revlog.length

264

265

if len(revs) <= 1:

266

if len(revs) <= 1:

266

yield revs

267

yield revs

267

return

268

return

268

269

nextrev = len(revlog)

270

nextrev = len(revlog)

270

nextoffset = revlog.end(nextrev - 1)

271

nextoffset = revlog.end(nextrev - 1)

271

272

if deltainfo is None:

273

if deltainfo is None:

273

deltachainspan = segmentspan(revlog, revs)

274

deltachainspan = segmentspan(revlog, revs)

274

chainpayload = sum(length(r) for r in revs)

275

chainpayload = sum(length(r) for r in revs)

275

else:

276

else:

276

deltachainspan = deltainfo.distance

277

deltachainspan = deltainfo.distance

277

chainpayload = deltainfo.compresseddeltalen

278

chainpayload = deltainfo.compresseddeltalen

278

279

if deltachainspan < mingapsize:

280

if deltachainspan < mingapsize:

280

yield revs

281

yield revs

281

return

282

return

282

283

readdata = deltachainspan

284

readdata = deltachainspan

284

285

if deltachainspan:

286

if deltachainspan:

286

density = chainpayload / float(deltachainspan)

287

density = chainpayload / float(deltachainspan)

287

else:

288

else:

288

density = 1.0

289

density = 1.0

289

290

if density >= targetdensity:

291

if density >= targetdensity:

291

yield revs

292

yield revs

292

return

293

return

293

294

if deltainfo is not None and deltainfo.deltalen:

295

if deltainfo is not None and deltainfo.deltalen:

295

revs = list(revs)

296

revs = list(revs)

296

revs.append(nextrev)

297

revs.append(nextrev)

297

298

# Store the gaps in a heap to have them sorted by decreasing size

299

# Store the gaps in a heap to have them sorted by decreasing size

299

gapsheap = []

300

gapsheap = []

300

heapq.heapify(gapsheap)

301

heapq.heapify(gapsheap)

301

prevend = None

302

prevend = None

302

for i, rev in enumerate(revs):

303

for i, rev in enumerate(revs):

303

if rev < nextrev:

304

if rev < nextrev:

304

revstart = start(rev)

305

revstart = start(rev)

305

revlen = length(rev)

306

revlen = length(rev)

306

else:

307

else:

307

revstart = nextoffset

308

revstart = nextoffset

308

revlen = deltainfo.deltalen

309

revlen = deltainfo.deltalen

309

310

# Skip empty revisions to form larger holes

311

# Skip empty revisions to form larger holes

311

if revlen == 0:

312

if revlen == 0:

312

continue

313

continue

313

314

if prevend is not None:

315

if prevend is not None:

315

gapsize = revstart - prevend

316

gapsize = revstart - prevend

316

# only consider holes that are large enough

317

# only consider holes that are large enough

317

if gapsize > mingapsize:

318

if gapsize > mingapsize:

318

heapq.heappush(gapsheap, (-gapsize, i))

319

heapq.heappush(gapsheap, (-gapsize, i))

319

320

prevend = revstart + revlen

321

prevend = revstart + revlen

321

322

# Collect the indices of the largest holes until the density is acceptable

323

# Collect the indices of the largest holes until the density is acceptable

323

indicesheap = []

324

indicesheap = []

324

heapq.heapify(indicesheap)

325

heapq.heapify(indicesheap)

325

while gapsheap and density < targetdensity:

326

while gapsheap and density < targetdensity:

326

oppgapsize, gapidx = heapq.heappop(gapsheap)

327

oppgapsize, gapidx = heapq.heappop(gapsheap)

327

328

heapq.heappush(indicesheap, gapidx)

329

heapq.heappush(indicesheap, gapidx)

329

330

# the gap sizes are stored as negatives to be sorted decreasingly

331

# the gap sizes are stored as negatives to be sorted decreasingly

331

# by the heap

332

# by the heap

332

readdata -= (-oppgapsize)

333

readdata -= (-oppgapsize)

333

if readdata > 0:

334

if readdata > 0:

334

density = chainpayload / float(readdata)

335

density = chainpayload / float(readdata)

335

else:

336

else:

336

density = 1.0

337

density = 1.0

337

338

# Cut the revs at collected indices

339

# Cut the revs at collected indices

339

previdx = 0

340

previdx = 0

340

while indicesheap:

341

while indicesheap:

341

idx = heapq.heappop(indicesheap)

342

idx = heapq.heappop(indicesheap)

342

343

chunk = _trimchunk(revlog, revs, previdx, idx)

344

chunk = _trimchunk(revlog, revs, previdx, idx)

344

if chunk:

345

if chunk:

345

yield chunk

346

yield chunk

346

347

previdx = idx

348

previdx = idx

348

349

chunk = _trimchunk(revlog, revs, previdx)

350

chunk = _trimchunk(revlog, revs, previdx)

350

if chunk:

351

if chunk:

351

yield chunk

352

yield chunk

352

353

def _trimchunk(revlog, revs, startidx, endidx=None):

354

def _trimchunk(revlog, revs, startidx, endidx=None):

354

"""returns revs[startidx:endidx] without empty trailing revs

355

"""returns revs[startidx:endidx] without empty trailing revs

355

356

Doctest Setup

357

Doctest Setup

357

>>> revlog = _testrevlog([

358

>>> revlog = _testrevlog([

358

... 5, #0

359

... 5, #0

359

... 10, #1

360

... 10, #1

360

... 12, #2

361

... 12, #2

361

... 12, #3 (empty)

362

... 12, #3 (empty)

362

... 17, #4

363

... 17, #4

363

... 21, #5

364

... 21, #5

364

... 21, #6 (empty)

365

... 21, #6 (empty)

365

... ])

366

... ])

366

367

Contiguous cases:

368

Contiguous cases:

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

369

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

369

[0, 1, 2, 3, 4, 5]

370

[0, 1, 2, 3, 4, 5]

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

371

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

371

[0, 1, 2, 3, 4]

372

[0, 1, 2, 3, 4]

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

373

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

373

[0, 1, 2]

374

[0, 1, 2]

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

375

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

375

[2]

376

[2]

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

377

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

377

[3, 4, 5]

378

[3, 4, 5]

378

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

379

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

379

[3, 4]

380

[3, 4]

380

381

Discontiguous cases:

382

Discontiguous cases:

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

383

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

383

[1, 3, 5]

384

[1, 3, 5]

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

385

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

385

[1]

386

[1]

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

387

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

387

[3, 5]

388

[3, 5]

388

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

389

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

389

[3, 5]

390

[3, 5]

390

"""

391

"""

391

length = revlog.length

392

length = revlog.length

392

393

if endidx is None:

394

if endidx is None:

394

endidx = len(revs)

395

endidx = len(revs)

395

396

# If we have a non-emtpy delta candidate, there are nothing to trim

397

# If we have a non-emtpy delta candidate, there are nothing to trim

397

if revs[endidx - 1] < len(revlog):

398

if revs[endidx - 1] < len(revlog):

398

# Trim empty revs at the end, except the very first revision of a chain

399

# Trim empty revs at the end, except the very first revision of a chain

399

while (endidx > 1

400

while (endidx > 1

400

and endidx > startidx

401

and endidx > startidx

401

and length(revs[endidx - 1]) == 0):

402

and length(revs[endidx - 1]) == 0):

402

endidx -= 1

403

endidx -= 1

403

404

return revs[startidx:endidx]

405

return revs[startidx:endidx]

405

406

def segmentspan(revlog, revs, deltainfo=None):

407

def segmentspan(revlog, revs, deltainfo=None):

407

"""Get the byte span of a segment of revisions

408

"""Get the byte span of a segment of revisions

408

409

revs is a sorted array of revision numbers

410

revs is a sorted array of revision numbers

410

411

>>> revlog = _testrevlog([

412

>>> revlog = _testrevlog([

412

... 5, #0

413

... 5, #0

413

... 10, #1

414

... 10, #1

414

... 12, #2

415

... 12, #2

415

... 12, #3 (empty)

416

... 12, #3 (empty)

416

... 17, #4

417

... 17, #4

417

... ])

418

... ])

418

419

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

420

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

420

17

421

17

421

>>> segmentspan(revlog, [0, 4])

422

>>> segmentspan(revlog, [0, 4])

422

17

423

17

423

>>> segmentspan(revlog, [3, 4])

424

>>> segmentspan(revlog, [3, 4])

424

5

425

5

425

>>> segmentspan(revlog, [1, 2, 3,])

426

>>> segmentspan(revlog, [1, 2, 3,])

426

7

427

7

427

>>> segmentspan(revlog, [1, 3])

428

>>> segmentspan(revlog, [1, 3])

428

7

429

7

429

"""

430

"""

430

if not revs:

431

if not revs:

431

return 0

432

return 0

432

if deltainfo is not None and len(revlog) <= revs[-1]:

433

if deltainfo is not None and len(revlog) <= revs[-1]:

433

if len(revs) == 1:

434

if len(revs) == 1:

434

return deltainfo.deltalen

435

return deltainfo.deltalen

435

offset = revlog.end(len(revlog) - 1)

436

offset = revlog.end(len(revlog) - 1)

436

end = deltainfo.deltalen + offset

437

end = deltainfo.deltalen + offset

437

else:

438

else:

438

end = revlog.end(revs[-1])

439

end = revlog.end(revs[-1])

439

return end - revlog.start(revs[0])

440

return end - revlog.start(revs[0])

440

441

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

442

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

442

"""build full text from a (base, delta) pair and other metadata"""

443

"""build full text from a (base, delta) pair and other metadata"""

443

# special case deltas which replace entire base; no need to decode

444

# special case deltas which replace entire base; no need to decode

444

# base revision. this neatly avoids censored bases, which throw when

445

# base revision. this neatly avoids censored bases, which throw when

445

# they're decoded.

446

# they're decoded.

446

hlen = struct.calcsize(">lll")

447

hlen = struct.calcsize(">lll")

447

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

448

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

448

len(delta) - hlen):

449

len(delta) - hlen):

449

fulltext = delta[hlen:]

450

fulltext = delta[hlen:]

450

else:

451

else:

451

# deltabase is rawtext before changed by flag processors, which is

452

# deltabase is rawtext before changed by flag processors, which is

452

# equivalent to non-raw text

453

# equivalent to non-raw text

453

basetext = revlog.revision(baserev, _df=fh, raw=False)

454

basetext = revlog.revision(baserev, _df=fh, raw=False)

454

fulltext = mdiff.patch(basetext, delta)

455

fulltext = mdiff.patch(basetext, delta)

455

456

try:

457

try:

457

res = revlog._processflags(fulltext, flags, 'read', raw=True)

458

res = revlog._processflags(fulltext, flags, 'read', raw=True)

458

fulltext, validatehash = res

459

fulltext, validatehash = res

459

if validatehash:

460

if validatehash:

460

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

461

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

461

if flags & REVIDX_ISCENSORED:

462

if flags & REVIDX_ISCENSORED:

462

raise RevlogError(_('node %s is not censored') % expectednode)

463

raise RevlogError(_('node %s is not censored') % expectednode)

463

except CensoredNodeError:

464

except CensoredNodeError:

464

# must pass the censored index flag to add censored revisions

465

# must pass the censored index flag to add censored revisions

465

if not flags & REVIDX_ISCENSORED:

466

if not flags & REVIDX_ISCENSORED:

466

raise

467

raise

467

return fulltext

468

return fulltext

468

469

@attr.s(slots=True, frozen=True)

470

@attr.s(slots=True, frozen=True)

470

class _deltainfo(object):

471

class _deltainfo(object):

471

distance = attr.ib()

472

distance = attr.ib()

472

deltalen = attr.ib()

473

deltalen = attr.ib()

473

data = attr.ib()

474

data = attr.ib()

474

base = attr.ib()

475

base = attr.ib()

475

chainbase = attr.ib()

476

chainbase = attr.ib()

476

chainlen = attr.ib()

477

chainlen = attr.ib()

477

compresseddeltalen = attr.ib()

478

compresseddeltalen = attr.ib()

478

snapshotdepth = attr.ib()

479

snapshotdepth = attr.ib()

479

480

def isgooddeltainfo(revlog, deltainfo, revinfo):

481

def isgooddeltainfo(revlog, deltainfo, revinfo):

481

"""Returns True if the given delta is good. Good means that it is within

482

"""Returns True if the given delta is good. Good means that it is within

482

the disk span, disk size, and chain length bounds that we know to be

483

the disk span, disk size, and chain length bounds that we know to be

483

performant."""

484

performant."""

484

if deltainfo is None:

485

if deltainfo is None:

485

return False

486

return False

486

487

# - 'deltainfo.distance' is the distance from the base revision --

488

# - 'deltainfo.distance' is the distance from the base revision --

488

# bounding it limits the amount of I/O we need to do.

489

# bounding it limits the amount of I/O we need to do.

489

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

490

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

490

# deltas we need to apply -- bounding it limits the amount of CPU

491

# deltas we need to apply -- bounding it limits the amount of CPU

491

# we consume.

492

# we consume.

492

493

if revlog._sparserevlog:

494

if revlog._sparserevlog:

494

# As sparse-read will be used, we can consider that the distance,

495

# As sparse-read will be used, we can consider that the distance,

495

# instead of being the span of the whole chunk,

496

# instead of being the span of the whole chunk,

496

# is the span of the largest read chunk

497

# is the span of the largest read chunk

497

base = deltainfo.base

498

base = deltainfo.base

498

499

if base != nullrev:

500

if base != nullrev:

500

deltachain = revlog._deltachain(base)[0]

501

deltachain = revlog._deltachain(base)[0]

501

else:

502

else:

502

deltachain = []

503

deltachain = []

503

504

# search for the first non-snapshot revision

505

# search for the first non-snapshot revision

505

for idx, r in enumerate(deltachain):

506

for idx, r in enumerate(deltachain):

506

if not revlog.issnapshot(r):

507

if not revlog.issnapshot(r):

507

break

508

break

508

deltachain = deltachain[idx:]

509

deltachain = deltachain[idx:]

509

chunks = slicechunk(revlog, deltachain, deltainfo)

510

chunks = slicechunk(revlog, deltachain, deltainfo)

510

all_span = [segmentspan(revlog, revs, deltainfo)

511

all_span = [segmentspan(revlog, revs, deltainfo)

511

for revs in chunks]

512

for revs in chunks]

512

distance = max(all_span)

513

distance = max(all_span)

513

else:

514

else:

514

distance = deltainfo.distance

515

distance = deltainfo.distance

515

516

textlen = revinfo.textlen

517

textlen = revinfo.textlen

517

defaultmax = textlen * 4

518

defaultmax = textlen * 4

518

maxdist = revlog._maxdeltachainspan

519

maxdist = revlog._maxdeltachainspan

519

if not maxdist:

520

if not maxdist:

520

maxdist = distance # ensure the conditional pass

521

maxdist = distance # ensure the conditional pass

521

maxdist = max(maxdist, defaultmax)

522

maxdist = max(maxdist, defaultmax)

522

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

523

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

523

# In multiple place, we are ignoring irrelevant data range below a

524

# In multiple place, we are ignoring irrelevant data range below a

524

# certain size. Be also apply this tradeoff here and relax span

525

# certain size. Be also apply this tradeoff here and relax span

525

# constraint for small enought content.

526

# constraint for small enought content.

526

maxdist = revlog._srmingapsize

527

maxdist = revlog._srmingapsize

527

528

# Bad delta from read span:

529

# Bad delta from read span:

529

#

530

#

530

# If the span of data read is larger than the maximum allowed.

531

# If the span of data read is larger than the maximum allowed.

531

if maxdist < distance:

532

if maxdist < distance:

532

return False

533

return False

533

534

# Bad delta from new delta size:

535

# Bad delta from new delta size:

535

#

536

#

536

# If the delta size is larger than the target text, storing the

537

# If the delta size is larger than the target text, storing the

537

# delta will be inefficient.

538

# delta will be inefficient.

538

if textlen < deltainfo.deltalen:

539

if textlen < deltainfo.deltalen:

539

return False

540

return False

540

541

# Bad delta from cumulated payload size:

542

# Bad delta from cumulated payload size:

542

#

543

#

543

# If the sum of delta get larger than K * target text length.

544

# If the sum of delta get larger than K * target text length.

544

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

545

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

545

return False

546

return False

546

547

# Bad delta from chain length:

548

# Bad delta from chain length:

548

#

549

#

549

# If the number of delta in the chain gets too high.

550

# If the number of delta in the chain gets too high.

550

if (revlog._maxchainlen

551

if (revlog._maxchainlen

551

and revlog._maxchainlen < deltainfo.chainlen):

552

and revlog._maxchainlen < deltainfo.chainlen):

552

return False

553

return False

553

554

# bad delta from intermediate snapshot size limit

555

# bad delta from intermediate snapshot size limit

555

#

556

#

556

# If an intermediate snapshot size is higher than the limit. The

557

# If an intermediate snapshot size is higher than the limit. The

557

# limit exist to prevent endless chain of intermediate delta to be

558

# limit exist to prevent endless chain of intermediate delta to be

558

# created.

559

# created.

559

if (deltainfo.snapshotdepth is not None and

560

if (deltainfo.snapshotdepth is not None and

560

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

561

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

561

return False

562

return False

562

563

# bad delta if new intermediate snapshot is larger than the previous

564

# bad delta if new intermediate snapshot is larger than the previous

564

# snapshot

565

# snapshot

565

if (deltainfo.snapshotdepth

566

if (deltainfo.snapshotdepth

566

and revlog.length(deltainfo.base) < deltainfo.deltalen):

567

and revlog.length(deltainfo.base) < deltainfo.deltalen):

567

return False

568

return False

568

569

return True

570

return True

570

571

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

572

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

572

"""Provides group of revision to be tested as delta base

573

"""Provides group of revision to be tested as delta base

573

574

This top level function focus on emitting groups with unique and worthwhile

575

This top level function focus on emitting groups with unique and worthwhile

575

content. See _raw_candidate_groups for details about the group order.

576

content. See _raw_candidate_groups for details about the group order.

576

"""

577

"""

577

# should we try to build a delta?

578

# should we try to build a delta?

578

if not (len(revlog) and revlog._storedeltachains):

579

if not (len(revlog) and revlog._storedeltachains):

579

return

580

return

580

581

deltalength = revlog.length

582

deltalength = revlog.length

582

deltaparent = revlog.deltaparent

583

deltaparent = revlog.deltaparent

583

584

deltas_limit = textlen * LIMIT_DELTA2TEXT

585

deltas_limit = textlen * LIMIT_DELTA2TEXT

585

586

tested = set([nullrev])

587

tested = set([nullrev])

587

for temptative in _rawgroups(revlog, p1, p2, cachedelta):

588

for temptative in _rawgroups(revlog, p1, p2, cachedelta):

588

group = []

589

group = []

589

for rev in temptative:

590

for rev in temptative:

590

# skip over empty delta (no need to include them in a chain)

591

# skip over empty delta (no need to include them in a chain)

591

while not (rev == nullrev or rev in tested or deltalength(rev)):

592

while not (rev == nullrev or rev in tested or deltalength(rev)):

592

rev = deltaparent(rev)

593

rev = deltaparent(rev)

593

tested.add(rev)

594

tested.add(rev)

594

# filter out revision we tested already

595

# filter out revision we tested already

595

if rev in tested:

596

if rev in tested:

596

continue

597

continue

597

tested.add(rev)

598

tested.add(rev)

598

# filter out delta base that will never produce good delta

599

# filter out delta base that will never produce good delta

599

if deltas_limit < revlog.length(rev):

600

if deltas_limit < revlog.length(rev):

600

continue

601

continue

601

# no need to try a delta against nullrev, this will be done as a

602

# no need to try a delta against nullrev, this will be done as a

602

# last resort.

603

# last resort.

603

if rev == nullrev:

604

if rev == nullrev:

604

continue

605

continue

605

# no delta for rawtext-changing revs (see "candelta" for why)

606

# no delta for rawtext-changing revs (see "candelta" for why)

606

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

607

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

607

continue

608

continue

608

group.append(rev)

609

group.append(rev)

609

if group:

610

if group:

611

# XXX: in the sparse revlog case, group can become large,

612

# impacting performances. Some bounding or slicing mecanism

613

# would help to reduce this impact.

610

yield tuple(group)

614

yield tuple(group)

611

615

616

def _findsnapshots(revlog, cache, start_rev):

617

"""find snapshot from start_rev to tip"""

618

deltaparent = revlog.deltaparent

619

for rev in revlog.revs(start_rev):

620

if deltaparent(rev) == nullrev:

621

cache[nullrev].append(rev)

622

612

def _rawgroups(revlog, p1, p2, cachedelta):

623

def _rawgroups(revlog, p1, p2, cachedelta):

613

"""Provides group of revision to be tested as delta base

624

"""Provides group of revision to be tested as delta base

614

625

615

This lower level function focus on emitting delta theorically interresting

626

This lower level function focus on emitting delta theorically interresting

616

without looking it any practical details.

627

without looking it any practical details.

617

628

618

The group order aims at providing fast or small candidates first.

629

The group order aims at providing fast or small candidates first.

619

"""

630

"""

620

gdelta = revlog._generaldelta

631

gdelta = revlog._generaldelta

621

sparse = revlog._sparserevlog

632

sparse = revlog._sparserevlog

622

curr = len(revlog)

633

curr = len(revlog)

623

prev = curr - 1

634

prev = curr - 1

624

deltachain = lambda rev: revlog._deltachain(rev)[0]

635

deltachain = lambda rev: revlog._deltachain(rev)[0]

625

636

626

# First we try to reuse a the delta contained in the bundle.

637

# First we try to reuse a the delta contained in the bundle.

627

# (or from the source revlog)

638

# (or from the source revlog)

628

#

639

#

629

# This logic only applies to general delta repositories and can be disabled

640

# This logic only applies to general delta repositories and can be disabled

630

# through configuration. Disabling reuse of source delta is useful when

641

# through configuration. Disabling reuse of source delta is useful when

631

# we want to make sure we recomputed "optimal" deltas.

642

# we want to make sure we recomputed "optimal" deltas.

632

if cachedelta and gdelta and revlog._lazydeltabase:

643

if cachedelta and gdelta and revlog._lazydeltabase:

633

# Assume what we received from the server is a good choice

644

# Assume what we received from the server is a good choice

634

# build delta will reuse the cache

645

# build delta will reuse the cache

635

yield (cachedelta[0],)

646

yield (cachedelta[0],)

636

647

637

if gdelta:

648

if gdelta:

638

# exclude already lazy tested base if any

649

# exclude already lazy tested base if any

639

parents = [p for p in (p1, p2) if p != nullrev]

650

parents = [p for p in (p1, p2) if p != nullrev]

640

651

641

if not revlog._deltabothparents and len(parents) == 2:

652

if not revlog._deltabothparents and len(parents) == 2:

642

parents.sort()

653

parents.sort()

643

# To minimize the chance of having to build a fulltext,

654

# To minimize the chance of having to build a fulltext,

644

# pick first whichever parent is closest to us (max rev)

655

# pick first whichever parent is closest to us (max rev)

645

yield (parents[1],)

656

yield (parents[1],)

646

# then the other one (min rev) if the first did not fit

657

# then the other one (min rev) if the first did not fit

647

yield (parents[0],)

658

yield (parents[0],)

648

elif len(parents) > 0:

659

elif len(parents) > 0:

649

# Test all parents (1 or 2), and keep the best candidate

660

# Test all parents (1 or 2), and keep the best candidate

650

yield parents

661

yield parents

651

662

652

if sparse and parents:

663

if sparse and parents:

653

# See if we can use an existing snapshot in the parent chains to use as

664

# See if we can use an existing snapshot in the parent chains to use as

654

# a base for a new intermediate-snapshot

665

# a base for a new intermediate-snapshot

655

bases = []

666

bases = []

656

for p in parents:

667

for p in parents:

657

bases.append(deltachain(p)[0])

668

bases.append(deltachain(p)[0])

658

yield tuple(sorted(bases))

669

yield tuple(sorted(bases))

670

# No suitable base found in the parent chain, search if any full

671

# snapshots emitted since parent's base would be a suitable base for an

672

# intermediate snapshot.

673

#

674

# It give a chance to reuse a delta chain unrelated to the current

675

# revisions instead of starting our own. Without such re-use,

676

# topological branches would keep reopening new full chains. Creating

677

# more and more snapshot as the repository grow.

678

snapfloor = min(bases) + 1

679

snapshots = collections.defaultdict(list)

680

_findsnapshots(revlog, snapshots, snapfloor)

681

yield tuple(snapshots[nullrev])

659

682

660

# other approach failed try against prev to hopefully save us a

683

# other approach failed try against prev to hopefully save us a

661

# fulltext.

684

# fulltext.

662

yield (prev,)

685

yield (prev,)

663

686

664

class deltacomputer(object):

687

class deltacomputer(object):

665

def __init__(self, revlog):

688

def __init__(self, revlog):

666

self.revlog = revlog

689

self.revlog = revlog

667

690

668

def buildtext(self, revinfo, fh):

691

def buildtext(self, revinfo, fh):

669

"""Builds a fulltext version of a revision

692

"""Builds a fulltext version of a revision

670

693

671

revinfo: _revisioninfo instance that contains all needed info

694

revinfo: _revisioninfo instance that contains all needed info

672

fh: file handle to either the .i or the .d revlog file,

695

fh: file handle to either the .i or the .d revlog file,

673

depending on whether it is inlined or not

696

depending on whether it is inlined or not

674

"""

697

"""

675

btext = revinfo.btext

698

btext = revinfo.btext

676

if btext[0] is not None:

699

if btext[0] is not None:

677

return btext[0]

700

return btext[0]

678

701

679

revlog = self.revlog

702

revlog = self.revlog

680

cachedelta = revinfo.cachedelta

703

cachedelta = revinfo.cachedelta

681

baserev = cachedelta[0]

704

baserev = cachedelta[0]

682

delta = cachedelta[1]

705

delta = cachedelta[1]

683

706

684

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

707

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

685

revinfo.p1, revinfo.p2,

708

revinfo.p1, revinfo.p2,

686

revinfo.flags, revinfo.node)

709

revinfo.flags, revinfo.node)

687

return fulltext

710

return fulltext

688

711

689

def _builddeltadiff(self, base, revinfo, fh):

712

def _builddeltadiff(self, base, revinfo, fh):

690

revlog = self.revlog

713

revlog = self.revlog

691

t = self.buildtext(revinfo, fh)

714

t = self.buildtext(revinfo, fh)

692

if revlog.iscensored(base):

715

if revlog.iscensored(base):

693

# deltas based on a censored revision must replace the

716

# deltas based on a censored revision must replace the

694

# full content in one patch, so delta works everywhere

717

# full content in one patch, so delta works everywhere

695

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

718

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

696

delta = header + t

719

delta = header + t

697

else:

720

else:

698

ptext = revlog.revision(base, _df=fh, raw=True)

721

ptext = revlog.revision(base, _df=fh, raw=True)

699

delta = mdiff.textdiff(ptext, t)

722

delta = mdiff.textdiff(ptext, t)

700

723

701

return delta

724

return delta

702

725

703

def _builddeltainfo(self, revinfo, base, fh):

726

def _builddeltainfo(self, revinfo, base, fh):

704

# can we use the cached delta?

727

# can we use the cached delta?

705

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

728

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

706

delta = revinfo.cachedelta[1]

729

delta = revinfo.cachedelta[1]

707

else:

730

else:

708

delta = self._builddeltadiff(base, revinfo, fh)

731

delta = self._builddeltadiff(base, revinfo, fh)

709

revlog = self.revlog

732

revlog = self.revlog

710

header, data = revlog.compress(delta)

733

header, data = revlog.compress(delta)

711

deltalen = len(header) + len(data)

734

deltalen = len(header) + len(data)

712

chainbase = revlog.chainbase(base)

735

chainbase = revlog.chainbase(base)

713

offset = revlog.end(len(revlog) - 1)

736

offset = revlog.end(len(revlog) - 1)

714

dist = deltalen + offset - revlog.start(chainbase)

737

dist = deltalen + offset - revlog.start(chainbase)

715

if revlog._generaldelta:

738

if revlog._generaldelta:

716

deltabase = base

739

deltabase = base

717

else:

740

else:

718

deltabase = chainbase

741

deltabase = chainbase

719

chainlen, compresseddeltalen = revlog._chaininfo(base)

742

chainlen, compresseddeltalen = revlog._chaininfo(base)

720

chainlen += 1

743

chainlen += 1

721

compresseddeltalen += deltalen

744

compresseddeltalen += deltalen

722

745

723

revlog = self.revlog

746

revlog = self.revlog

724

snapshotdepth = None

747

snapshotdepth = None

725

if deltabase == nullrev:

748

if deltabase == nullrev:

726

snapshotdepth = 0

749

snapshotdepth = 0

727

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

750

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

728

# A delta chain should always be one full snapshot,

751

# A delta chain should always be one full snapshot,

729

# zero or more semi-snapshots, and zero or more deltas

752

# zero or more semi-snapshots, and zero or more deltas

730

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

753

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

731

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

754

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

732

snapshotdepth = len(revlog._deltachain(deltabase)[0])

755

snapshotdepth = len(revlog._deltachain(deltabase)[0])

733

756

734

return _deltainfo(dist, deltalen, (header, data), deltabase,

757

return _deltainfo(dist, deltalen, (header, data), deltabase,

735

chainbase, chainlen, compresseddeltalen,

758

chainbase, chainlen, compresseddeltalen,

736

snapshotdepth)

759

snapshotdepth)

737

760

738

def _fullsnapshotinfo(self, fh, revinfo):

761

def _fullsnapshotinfo(self, fh, revinfo):

739

curr = len(self.revlog)

762

curr = len(self.revlog)

740

rawtext = self.buildtext(revinfo, fh)

763

rawtext = self.buildtext(revinfo, fh)

741

data = self.revlog.compress(rawtext)

764

data = self.revlog.compress(rawtext)

742

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

765

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

743

deltabase = chainbase = curr

766

deltabase = chainbase = curr

744

snapshotdepth = 0

767

snapshotdepth = 0

745

chainlen = 1

768

chainlen = 1

746

769

747

return _deltainfo(dist, deltalen, data, deltabase,

770

return _deltainfo(dist, deltalen, data, deltabase,

748

chainbase, chainlen, compresseddeltalen,

771

chainbase, chainlen, compresseddeltalen,

749

snapshotdepth)

772

snapshotdepth)

750

773

751

def finddeltainfo(self, revinfo, fh):

774

def finddeltainfo(self, revinfo, fh):

752

"""Find an acceptable delta against a candidate revision

775

"""Find an acceptable delta against a candidate revision

753

776

754

revinfo: information about the revision (instance of _revisioninfo)

777

revinfo: information about the revision (instance of _revisioninfo)

755

fh: file handle to either the .i or the .d revlog file,

778

fh: file handle to either the .i or the .d revlog file,

756

depending on whether it is inlined or not

779

depending on whether it is inlined or not

757

780

758

Returns the first acceptable candidate revision, as ordered by

781

Returns the first acceptable candidate revision, as ordered by

759

_candidategroups

782

_candidategroups

760

783

761

If no suitable deltabase is found, we return delta info for a full

784

If no suitable deltabase is found, we return delta info for a full

762

snapshot.

785

snapshot.

763

"""

786

"""

764

if not revinfo.textlen:

787

if not revinfo.textlen:

765

return self._fullsnapshotinfo(fh, revinfo)

788

return self._fullsnapshotinfo(fh, revinfo)

766

789

767

# no delta for flag processor revision (see "candelta" for why)

790

# no delta for flag processor revision (see "candelta" for why)

768

# not calling candelta since only one revision needs test, also to

791

# not calling candelta since only one revision needs test, also to

769

# avoid overhead fetching flags again.

792

# avoid overhead fetching flags again.

770

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

793

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

771

return self._fullsnapshotinfo(fh, revinfo)

794

return self._fullsnapshotinfo(fh, revinfo)

772

795

773

cachedelta = revinfo.cachedelta

796

cachedelta = revinfo.cachedelta

774

p1 = revinfo.p1

797

p1 = revinfo.p1

775

p2 = revinfo.p2

798

p2 = revinfo.p2

776

revlog = self.revlog

799

revlog = self.revlog

777

800

778

deltainfo = None

801

deltainfo = None

779

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

802

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

780

groups = _candidategroups(self.revlog, revinfo.textlen,

803

groups = _candidategroups(self.revlog, revinfo.textlen,

781

p1r, p2r, cachedelta)

804

p1r, p2r, cachedelta)

782

for candidaterevs in groups:

805

for candidaterevs in groups:

783

nominateddeltas = []

806

nominateddeltas = []

784

for candidaterev in candidaterevs:

807

for candidaterev in candidaterevs:

785

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

808

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

786

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

809

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

787

nominateddeltas.append(candidatedelta)

810

nominateddeltas.append(candidatedelta)

788

if nominateddeltas:

811

if nominateddeltas:

789

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

812

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

790

break

813

break

791

814

792

if deltainfo is None:

815

if deltainfo is None:

793

deltainfo = self._fullsnapshotinfo(fh, revinfo)

816

deltainfo = self._fullsnapshotinfo(fh, revinfo)

794

return deltainfo

817

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
+            import collections
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             RevlogError = error.RevlogError
             CensoredNodeError = error.CensoredNodeError
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise RevlogError(_('node %s is not censored') % expectednode)
                 except CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 if revlog._sparserevlog:
                     # As sparse-read will be used, we can consider that the distance,
                     # instead of being the span of the whole chunk,
                     # is the span of the largest read chunk
                     base = deltainfo.base
                     if base != nullrev:
                         deltachain = revlog._deltachain(base)[0]
                     else:
                         deltachain = []
                     # search for the first non-snapshot revision
                     for idx, r in enumerate(deltachain):
                         if not revlog.issnapshot(r):
                             break
                     deltachain = deltachain[idx:]
                     chunks = slicechunk(revlog, deltachain, deltainfo)
                     all_span = [segmentspan(revlog, revs, deltainfo)
                                 for revs in chunks]
                     distance = max(all_span)
                 else:
                     distance = deltainfo.distance
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
                     # In multiple place, we are ignoring irrelevant data range below a
                     # certain size. Be also apply this tradeoff here and relax span
                     # constraint for small enought content.
                     maxdist = revlog._srmingapsize
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 if maxdist < distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 for temptative in _rawgroups(revlog, p1, p2, cachedelta):
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             rev = deltaparent(rev)
                             tested.add(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
+                        # XXX: in the sparse revlog case, group can become large,
+                        #      impacting performances. Some bounding or slicing mecanism
+                        #      would help to reduce this impact.
                         yield tuple(group)
+            def _findsnapshots(revlog, cache, start_rev):
+                """find snapshot from start_rev to tip"""
+                deltaparent = revlog.deltaparent
+                for rev in revlog.revs(start_rev):
+                    if deltaparent(rev) == nullrev:
+                        cache[nullrev].append(rev)
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse of source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and gdelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     yield (cachedelta[0],)
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     bases = []
                     for p in parents:
                         bases.append(deltachain(p)[0])
                     yield tuple(sorted(bases))
+                    # No suitable base found in the parent chain, search if any full
+                    # snapshots emitted since parent's base would be a suitable base for an
+                    # intermediate snapshot.
+                    #
+                    # It give a chance to reuse a delta chain unrelated to the current
+                    # revisions instead of starting our own. Without such re-use,
+                    # topological branches would keep reopening new full chains. Creating
+                    # more and more snapshot as the repository grow.
+                    snapfloor = min(bases) + 1
+                    snapshots = collections.defaultdict(list)
+                    _findsnapshots(revlog, snapshots, snapfloor)
+                    yield tuple(snapshots[nullrev])
                 # other approach failed try against prev to hopefully save us a
                 # fulltext.
                 yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     for candidaterevs in groups:
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo

             ====================================
             Test delta choice with sparse revlog
             ====================================
             Sparse-revlog usually shows the most gain on Manifest. However, it is simpler
             to general an appropriate file, so we test with a single file instead. The
             goal is to observe intermediate snapshot being created.
             We need a large enough file. Part of the content needs to be replaced
             repeatedly while some of it changes rarely.
               $ bundlepath="$TESTDIR/artifacts/cache/big-file-churn.hg"
               $ expectedhash=`cat "$bundlepath".md5`
               $ if [ ! -f "$bundlepath" ]; then
               >     echo 'skipped: missing artifact, run "'"$TESTDIR"'/artifacts/scripts/generate-churning-bundle.py"'
               >     exit 80
               > fi
               $ currenthash=`f -M "$bundlepath" | cut -d = -f 2`
               $ if [ "$currenthash" != "$expectedhash" ]; then
               >     echo 'skipped: outdated artifact, md5 "'"$currenthash"'" expected "'"$expectedhash"'" run "'"$TESTDIR"'/artifacts/scripts/generate-churning-bundle.py"'
               >     exit 80
               > fi
               $ cat >> $HGRCPATH << EOF
               > [format]
               > sparse-revlog = yes
               > [storage]
               > revlog.optimize-delta-parent-choice = yes
               > EOF
               $ hg init sparse-repo
               $ cd sparse-repo
               $ hg unbundle $bundlepath
               adding changesets
               adding manifests
               adding file changes
               added 5001 changesets with 5001 changes to 1 files (+89 heads)
               new changesets 9706f5af64f4:d9032adc8114 (5001 drafts)
               (run 'hg heads' to see heads, 'hg merge' to merge)
               $ hg up
 files updated, 0 files merged, 0 files removed, 0 files unresolved
               updated to "d9032adc8114: commit #5000"
 other heads for branch "default"
               $ hg log --stat -r 0:3
               changeset:   0:9706f5af64f4
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     initial commit
                SPARSE-REVLOG-TEST-FILE |  10500 ++++++++++++++++++++++++++++++++++++++++++++++
 files changed, 10500 insertions(+), 0 deletions(-)
               changeset:   1:724907deaa5e
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     commit #1
                SPARSE-REVLOG-TEST-FILE |  1068 +++++++++++++++++++++++-----------------------
 files changed, 534 insertions(+), 534 deletions(-)
               changeset:   2:62c41bce3e5d
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     commit #2
                SPARSE-REVLOG-TEST-FILE |  1068 +++++++++++++++++++++++-----------------------
 files changed, 534 insertions(+), 534 deletions(-)
               changeset:   3:348a9cbd6959
               user:        test
               date:        Thu Jan 01 00:00:00 1970 +0000
               summary:     commit #3
                SPARSE-REVLOG-TEST-FILE |  1068 +++++++++++++++++++++++-----------------------
 files changed, 534 insertions(+), 534 deletions(-)
               $ f -s .hg/store/data/*.d
-              .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=72315280
+              .hg/store/data/_s_p_a_r_s_e-_r_e_v_l_o_g-_t_e_s_t-_f_i_l_e.d: size=67810463
               $ hg debugrevlog *
               format : 1
               flags  : generaldelta
               revisions     :     5001
                   merges    :      625 (12.50%)
                   normal    :     4376 (87.50%)
               revisions     :     5001
                   empty     :        0 ( 0.00%)
                                  text  :        0 (100.00%)
                                  delta :        0 (100.00%)
-                  snapshot  :      145 ( 2.90%)
+                  snapshot  :      126 ( 2.52%)
-                    lvl-0   :             15 ( 0.30%)
+                    lvl-0   :              4 ( 0.08%)
-                    lvl-1   :            130 ( 2.60%)
+                    lvl-1   :            120 ( 2.40%)
-                  deltas    :     4856 (97.10%)
+                    lvl-2   :              2 ( 0.04%)
-              revision size : 72315280
+                  deltas    :     4875 (97.48%)
-                  snapshot  : 18481085 (25.56%)
+              revision size : 67810463
-                    lvl-0   :        3016019 ( 4.17%)
+                  snapshot  : 14373347 (21.20%)
-                    lvl-1   :       15465066 (21.39%)
+                    lvl-0   :         804235 ( 1.19%)
-                  deltas    : 53834195 (74.44%)
+                    lvl-1   :       13535903 (19.96%)
+                    lvl-2   :          33209 ( 0.05%)
+                  deltas    : 53437116 (78.80%)
               chunks        :     5001
 x78 (x)  :     5001 (100.00%)
-              chunks size   : 72315280
+              chunks size   : 67810463
-x78 (x)  : 72315280 (100.00%)
+x78 (x)  : 67810463 (100.00%)
               avg chain length  :       18
               max chain length  :       45
-              max chain reach   : 32095083
+              max chain reach   : 25808240
-              compression ratio :       23
+              compression ratio :       25
               uncompressed data size (min/max/avg) : 346468 / 346472 / 346471
-              full revision size (min/max/avg)     : 200990 / 201151 / 201067
+              full revision size (min/max/avg)     : 201014 / 201116 / 201058
-              inter-snapshot size (min/max/avg)    : 37202 / 173034 / 118962
+              inter-snapshot size (min/max/avg)    : 11623 / 173150 / 111222
-                  level-1   (min/max/avg)          : 37202 / 173034 / 118962
+                  level-1   (min/max/avg)          : 11623 / 173150 / 112799
-              delta size (min/max/avg)             : 10649 / 104791 / 11086
+                  level-2   (min/max/avg)          : 14151 / 19058 / 16604
+              delta size (min/max/avg)             : 10649 / 101790 / 10961
-              deltas against prev  : 4185 (86.18%)
+              deltas against prev  : 4207 (86.30%)
-                  where prev = p1  : 4139     (98.90%)
+                  where prev = p1  : 4164     (98.98%)
                   where prev = p2  :    0     ( 0.00%)
-                  other            :   46     ( 1.10%)
+                  other            :   43     ( 1.02%)
-              deltas against p1    :  647 (13.32%)
+              deltas against p1    :  653 (13.39%)
-              deltas against p2    :   24 ( 0.49%)
+              deltas against p2    :   15 ( 0.31%)
               deltas against other :    0 ( 0.00%)