upstream/mercurial-mirror Commit - r40641:a32ccd32

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import heapq

13

import heapq

14

import struct

14

import struct

15

16

# import stuff from node for others to import from revlog

16

# import stuff from node for others to import from revlog

17

from ..node import (

17

from ..node import (

18

nullrev,

18

nullrev,

19

)

19

)

20

from ..i18n import _

20

from ..i18n import _

21

22

from .constants import (

22

from .constants import (

23

REVIDX_ISCENSORED,

23

REVIDX_ISCENSORED,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

25

)

25

)

26

27

from ..thirdparty import (

27

from ..thirdparty import (

28

attr,

28

attr,

29

)

29

)

30

31

from .. import (

31

from .. import (

32

error,

32

error,

33

mdiff,

33

mdiff,

34

)

34

)

35

36

# maximum <delta-chain-data>/<revision-text-length> ratio

36

# maximum <delta-chain-data>/<revision-text-length> ratio

37

LIMIT_DELTA2TEXT = 2

37

LIMIT_DELTA2TEXT = 2

38

39

class _testrevlog(object):

39

class _testrevlog(object):

40

"""minimalist fake revlog to use in doctests"""

40

"""minimalist fake revlog to use in doctests"""

41

42

def __init__(self, data, density=0.5, mingap=0):

42

def __init__(self, data, density=0.5, mingap=0):

43

"""data is an list of revision payload boundaries"""

43

"""data is an list of revision payload boundaries"""

44

self._data = data

44

self._data = data

45

self._srdensitythreshold = density

45

self._srdensitythreshold = density

46

self._srmingapsize = mingap

46

self._srmingapsize = mingap

47

48

def start(self, rev):

48

def start(self, rev):

49

if rev == 0:

49

if rev == 0:

50

return 0

50

return 0

51

return self._data[rev - 1]

51

return self._data[rev - 1]

52

53

def end(self, rev):

53

def end(self, rev):

54

return self._data[rev]

54

return self._data[rev]

55

56

def length(self, rev):

56

def length(self, rev):

57

return self.end(rev) - self.start(rev)

57

return self.end(rev) - self.start(rev)

58

59

def __len__(self):

59

def __len__(self):

60

return len(self._data)

60

return len(self._data)

61

62

def slicechunk(revlog, revs, targetsize=None):

62

def slicechunk(revlog, revs, targetsize=None):

63

"""slice revs to reduce the amount of unrelated data to be read from disk.

63

"""slice revs to reduce the amount of unrelated data to be read from disk.

64

65

``revs`` is sliced into groups that should be read in one time.

65

``revs`` is sliced into groups that should be read in one time.

66

Assume that revs are sorted.

66

Assume that revs are sorted.

67

68

The initial chunk is sliced until the overall density (payload/chunks-span

68

The initial chunk is sliced until the overall density (payload/chunks-span

69

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

69

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

70

`revlog._srmingapsize` is skipped.

70

`revlog._srmingapsize` is skipped.

71

72

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

72

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

73

For consistency with other slicing choice, this limit won't go lower than

73

For consistency with other slicing choice, this limit won't go lower than

74

`revlog._srmingapsize`.

74

`revlog._srmingapsize`.

75

76

If individual revisions chunk are larger than this limit, they will still

76

If individual revisions chunk are larger than this limit, they will still

77

be raised individually.

77

be raised individually.

78

79

>>> revlog = _testrevlog([

79

>>> revlog = _testrevlog([

80

... 5, #00 (5)

80

... 5, #00 (5)

81

... 10, #01 (5)

81

... 10, #01 (5)

82

... 12, #02 (2)

82

... 12, #02 (2)

83

... 12, #03 (empty)

83

... 12, #03 (empty)

84

... 27, #04 (15)

84

... 27, #04 (15)

85

... 31, #05 (4)

85

... 31, #05 (4)

86

... 31, #06 (empty)

86

... 31, #06 (empty)

87

... 42, #07 (11)

87

... 42, #07 (11)

88

... 47, #08 (5)

88

... 47, #08 (5)

89

... 47, #09 (empty)

89

... 47, #09 (empty)

90

... 48, #10 (1)

90

... 48, #10 (1)

91

... 51, #11 (3)

91

... 51, #11 (3)

92

... 74, #12 (23)

92

... 74, #12 (23)

93

... 85, #13 (11)

93

... 85, #13 (11)

94

... 86, #14 (1)

94

... 86, #14 (1)

95

... 91, #15 (5)

95

... 91, #15 (5)

96

... ])

96

... ])

97

98

>>> list(slicechunk(revlog, list(range(16))))

98

>>> list(slicechunk(revlog, list(range(16))))

99

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

99

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

100

>>> list(slicechunk(revlog, [0, 15]))

100

>>> list(slicechunk(revlog, [0, 15]))

101

[[0], [15]]

101

[[0], [15]]

102

>>> list(slicechunk(revlog, [0, 11, 15]))

102

>>> list(slicechunk(revlog, [0, 11, 15]))

103

[[0], [11], [15]]

103

[[0], [11], [15]]

104

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

104

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

105

[[0], [11, 13, 15]]

105

[[0], [11, 13, 15]]

106

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

106

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

107

[[1, 2], [5, 8, 10, 11], [14]]

107

[[1, 2], [5, 8, 10, 11], [14]]

108

109

Slicing with a maximum chunk size

109

Slicing with a maximum chunk size

110

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

110

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

111

[[0], [11], [13], [15]]

111

[[0], [11], [13], [15]]

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

113

[[0], [11], [13, 15]]

113

[[0], [11], [13, 15]]

114

"""

114

"""

115

if targetsize is not None:

115

if targetsize is not None:

116

targetsize = max(targetsize, revlog._srmingapsize)

116

targetsize = max(targetsize, revlog._srmingapsize)

117

# targetsize should not be specified when evaluating delta candidates:

117

# targetsize should not be specified when evaluating delta candidates:

118

# * targetsize is used to ensure we stay within specification when reading,

118

# * targetsize is used to ensure we stay within specification when reading,

119

for chunk in _slicechunktodensity(revlog, revs,

119

for chunk in _slicechunktodensity(revlog, revs,

120

revlog._srdensitythreshold,

120

revlog._srdensitythreshold,

121

revlog._srmingapsize):

121

revlog._srmingapsize):

122

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

122

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

123

yield subchunk

123

yield subchunk

124

125

def _slicechunktosize(revlog, revs, targetsize=None):

125

def _slicechunktosize(revlog, revs, targetsize=None):

126

"""slice revs to match the target size

126

"""slice revs to match the target size

127

128

This is intended to be used on chunk that density slicing selected by that

128

This is intended to be used on chunk that density slicing selected by that

129

are still too large compared to the read garantee of revlog. This might

129

are still too large compared to the read garantee of revlog. This might

130

happens when "minimal gap size" interrupted the slicing or when chain are

130

happens when "minimal gap size" interrupted the slicing or when chain are

131

built in a way that create large blocks next to each other.

131

built in a way that create large blocks next to each other.

132

133

>>> revlog = _testrevlog([

133

>>> revlog = _testrevlog([

134

... 3, #0 (3)

134

... 3, #0 (3)

135

... 5, #1 (2)

135

... 5, #1 (2)

136

... 6, #2 (1)

136

... 6, #2 (1)

137

... 8, #3 (2)

137

... 8, #3 (2)

138

... 8, #4 (empty)

138

... 8, #4 (empty)

139

... 11, #5 (3)

139

... 11, #5 (3)

140

... 12, #6 (1)

140

... 12, #6 (1)

141

... 13, #7 (1)

141

... 13, #7 (1)

142

... 14, #8 (1)

142

... 14, #8 (1)

143

... ])

143

... ])

144

145

Cases where chunk is already small enough

145

Cases where chunk is already small enough

146

>>> list(_slicechunktosize(revlog, [0], 3))

146

>>> list(_slicechunktosize(revlog, [0], 3))

147

[[0]]

147

[[0]]

148

>>> list(_slicechunktosize(revlog, [6, 7], 3))

148

>>> list(_slicechunktosize(revlog, [6, 7], 3))

149

[[6, 7]]

149

[[6, 7]]

150

>>> list(_slicechunktosize(revlog, [0], None))

150

>>> list(_slicechunktosize(revlog, [0], None))

151

[[0]]

151

[[0]]

152

>>> list(_slicechunktosize(revlog, [6, 7], None))

152

>>> list(_slicechunktosize(revlog, [6, 7], None))

153

[[6, 7]]

153

[[6, 7]]

154

155

cases where we need actual slicing

155

cases where we need actual slicing

156

>>> list(_slicechunktosize(revlog, [0, 1], 3))

156

>>> list(_slicechunktosize(revlog, [0, 1], 3))

157

[[0], [1]]

157

[[0], [1]]

158

>>> list(_slicechunktosize(revlog, [1, 3], 3))

158

>>> list(_slicechunktosize(revlog, [1, 3], 3))

159

[[1], [3]]

159

[[1], [3]]

160

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

160

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

161

[[1, 2], [3]]

161

[[1, 2], [3]]

162

>>> list(_slicechunktosize(revlog, [3, 5], 3))

162

>>> list(_slicechunktosize(revlog, [3, 5], 3))

163

[[3], [5]]

163

[[3], [5]]

164

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

164

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

165

[[3], [5]]

165

[[3], [5]]

166

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

166

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

167

[[5], [6, 7, 8]]

167

[[5], [6, 7, 8]]

168

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

168

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

169

[[0], [1, 2], [3], [5], [6, 7, 8]]

169

[[0], [1, 2], [3], [5], [6, 7, 8]]

170

171

Case with too large individual chunk (must return valid chunk)

171

Case with too large individual chunk (must return valid chunk)

172

>>> list(_slicechunktosize(revlog, [0, 1], 2))

172

>>> list(_slicechunktosize(revlog, [0, 1], 2))

173

[[0], [1]]

173

[[0], [1]]

174

>>> list(_slicechunktosize(revlog, [1, 3], 1))

174

>>> list(_slicechunktosize(revlog, [1, 3], 1))

175

[[1], [3]]

175

[[1], [3]]

176

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

176

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

177

[[3], [5]]

177

[[3], [5]]

178

"""

178

"""

179

assert targetsize is None or 0 <= targetsize

179

assert targetsize is None or 0 <= targetsize

180

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

180

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

181

yield revs

181

yield revs

182

return

182

return

183

184

startrevidx = 0

184

startrevidx = 0

185

startdata = revlog.start(revs[0])

185

startdata = revlog.start(revs[0])

186

endrevidx = 0

186

endrevidx = 0

187

iterrevs = enumerate(revs)

187

iterrevs = enumerate(revs)

188

next(iterrevs) # skip first rev.

188

next(iterrevs) # skip first rev.

189

for idx, r in iterrevs:

189

for idx, r in iterrevs:

190

span = revlog.end(r) - startdata

190

span = revlog.end(r) - startdata

191

if span <= targetsize:

191

if span <= targetsize:

192

endrevidx = idx

192

endrevidx = idx

193

else:

193

else:

194

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

194

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

195

if chunk:

195

if chunk:

196

yield chunk

196

yield chunk

197

startrevidx = idx

197

startrevidx = idx

198

startdata = revlog.start(r)

198

startdata = revlog.start(r)

199

endrevidx = idx

199

endrevidx = idx

200

yield _trimchunk(revlog, revs, startrevidx)

200

yield _trimchunk(revlog, revs, startrevidx)

201

202

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

202

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

203

mingapsize=0):

203

mingapsize=0):

204

"""slice revs to reduce the amount of unrelated data to be read from disk.

204

"""slice revs to reduce the amount of unrelated data to be read from disk.

205

206

``revs`` is sliced into groups that should be read in one time.

206

``revs`` is sliced into groups that should be read in one time.

207

Assume that revs are sorted.

207

Assume that revs are sorted.

208

209

The initial chunk is sliced until the overall density (payload/chunks-span

209

The initial chunk is sliced until the overall density (payload/chunks-span

210

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

210

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

211

skipped.

211

skipped.

212

213

>>> revlog = _testrevlog([

213

>>> revlog = _testrevlog([

214

... 5, #00 (5)

214

... 5, #00 (5)

215

... 10, #01 (5)

215

... 10, #01 (5)

216

... 12, #02 (2)

216

... 12, #02 (2)

217

... 12, #03 (empty)

217

... 12, #03 (empty)

218

... 27, #04 (15)

218

... 27, #04 (15)

219

... 31, #05 (4)

219

... 31, #05 (4)

220

... 31, #06 (empty)

220

... 31, #06 (empty)

221

... 42, #07 (11)

221

... 42, #07 (11)

222

... 47, #08 (5)

222

... 47, #08 (5)

223

... 47, #09 (empty)

223

... 47, #09 (empty)

224

... 48, #10 (1)

224

... 48, #10 (1)

225

... 51, #11 (3)

225

... 51, #11 (3)

226

... 74, #12 (23)

226

... 74, #12 (23)

227

... 85, #13 (11)

227

... 85, #13 (11)

228

... 86, #14 (1)

228

... 86, #14 (1)

229

... 91, #15 (5)

229

... 91, #15 (5)

230

... ])

230

... ])

231

232

>>> list(_slicechunktodensity(revlog, list(range(16))))

232

>>> list(_slicechunktodensity(revlog, list(range(16))))

233

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

233

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

234

>>> list(_slicechunktodensity(revlog, [0, 15]))

234

>>> list(_slicechunktodensity(revlog, [0, 15]))

235

[[0], [15]]

235

[[0], [15]]

236

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

236

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

237

[[0], [11], [15]]

237

[[0], [11], [15]]

238

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

238

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

239

[[0], [11, 13, 15]]

239

[[0], [11, 13, 15]]

240

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

240

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

241

[[1, 2], [5, 8, 10, 11], [14]]

241

[[1, 2], [5, 8, 10, 11], [14]]

242

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

242

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

243

... mingapsize=20))

243

... mingapsize=20))

244

[[1, 2, 3, 5, 8, 10, 11], [14]]

244

[[1, 2, 3, 5, 8, 10, 11], [14]]

245

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

245

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

246

... targetdensity=0.95))

246

... targetdensity=0.95))

247

[[1, 2], [5], [8, 10, 11], [14]]

247

[[1, 2], [5], [8, 10, 11], [14]]

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

249

... targetdensity=0.95, mingapsize=12))

249

... targetdensity=0.95, mingapsize=12))

250

[[1, 2], [5, 8, 10, 11], [14]]

250

[[1, 2], [5, 8, 10, 11], [14]]

251

"""

251

"""

252

start = revlog.start

252

start = revlog.start

253

length = revlog.length

253

length = revlog.length

254

255

if len(revs) <= 1:

255

if len(revs) <= 1:

256

yield revs

256

yield revs

257

return

257

return

258

259

deltachainspan = segmentspan(revlog, revs)

259

deltachainspan = segmentspan(revlog, revs)

260

chainpayload = sum(length(r) for r in revs)

260

chainpayload = sum(length(r) for r in revs)

261

262

if deltachainspan < mingapsize:

262

if deltachainspan < mingapsize:

263

yield revs

263

yield revs

264

return

264

return

265

266

readdata = deltachainspan

266

readdata = deltachainspan

267

268

if deltachainspan:

268

if deltachainspan:

269

density = chainpayload / float(deltachainspan)

269

density = chainpayload / float(deltachainspan)

270

else:

270

else:

271

density = 1.0

271

density = 1.0

272

273

if density >= targetdensity:

273

if density >= targetdensity:

274

yield revs

274

yield revs

275

return

275

return

276

277

# Store the gaps in a heap to have them sorted by decreasing size

277

# Store the gaps in a heap to have them sorted by decreasing size

278

gapsheap = []

278

gapsheap = []

279

heapq.heapify(gapsheap)

279

heapq.heapify(gapsheap)

280

prevend = None

280

prevend = None

281

for i, rev in enumerate(revs):

281

for i, rev in enumerate(revs):

282

revstart = start(rev)

282

revstart = start(rev)

283

revlen = length(rev)

283

revlen = length(rev)

284

285

# Skip empty revisions to form larger holes

285

# Skip empty revisions to form larger holes

286

if revlen == 0:

286

if revlen == 0:

287

continue

287

continue

288

289

if prevend is not None:

289

if prevend is not None:

290

gapsize = revstart - prevend

290

gapsize = revstart - prevend

291

# only consider holes that are large enough

291

# only consider holes that are large enough

292

if gapsize > mingapsize:

292

if gapsize > mingapsize:

293

heapq.heappush(gapsheap, (-gapsize, i))

293

heapq.heappush(gapsheap, (-gapsize, i))

294

295

prevend = revstart + revlen

295

prevend = revstart + revlen

296

297

# Collect the indices of the largest holes until the density is acceptable

297

# Collect the indices of the largest holes until the density is acceptable

298

indicesheap = []

298

indicesheap = []

299

heapq.heapify(indicesheap)

299

heapq.heapify(indicesheap)

300

while gapsheap and density < targetdensity:

300

while gapsheap and density < targetdensity:

301

oppgapsize, gapidx = heapq.heappop(gapsheap)

301

oppgapsize, gapidx = heapq.heappop(gapsheap)

302

303

heapq.heappush(indicesheap, gapidx)

303

heapq.heappush(indicesheap, gapidx)

304

305

# the gap sizes are stored as negatives to be sorted decreasingly

305

# the gap sizes are stored as negatives to be sorted decreasingly

306

# by the heap

306

# by the heap

307

readdata -= (-oppgapsize)

307

readdata -= (-oppgapsize)

308

if readdata > 0:

308

if readdata > 0:

309

density = chainpayload / float(readdata)

309

density = chainpayload / float(readdata)

310

else:

310

else:

311

density = 1.0

311

density = 1.0

312

313

# Cut the revs at collected indices

313

# Cut the revs at collected indices

314

previdx = 0

314

previdx = 0

315

while indicesheap:

315

while indicesheap:

316

idx = heapq.heappop(indicesheap)

316

idx = heapq.heappop(indicesheap)

317

318

chunk = _trimchunk(revlog, revs, previdx, idx)

318

chunk = _trimchunk(revlog, revs, previdx, idx)

319

if chunk:

319

if chunk:

320

yield chunk

320

yield chunk

321

322

previdx = idx

322

previdx = idx

323

324

chunk = _trimchunk(revlog, revs, previdx)

324

chunk = _trimchunk(revlog, revs, previdx)

325

if chunk:

325

if chunk:

326

yield chunk

326

yield chunk

327

328

def _trimchunk(revlog, revs, startidx, endidx=None):

328

def _trimchunk(revlog, revs, startidx, endidx=None):

329

"""returns revs[startidx:endidx] without empty trailing revs

329

"""returns revs[startidx:endidx] without empty trailing revs

330

331

Doctest Setup

331

Doctest Setup

332

>>> revlog = _testrevlog([

332

>>> revlog = _testrevlog([

333

... 5, #0

333

... 5, #0

334

... 10, #1

334

... 10, #1

335

... 12, #2

335

... 12, #2

336

... 12, #3 (empty)

336

... 12, #3 (empty)

337

... 17, #4

337

... 17, #4

338

... 21, #5

338

... 21, #5

339

... 21, #6 (empty)

339

... 21, #6 (empty)

340

... ])

340

... ])

341

342

Contiguous cases:

342

Contiguous cases:

343

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

343

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

344

[0, 1, 2, 3, 4, 5]

344

[0, 1, 2, 3, 4, 5]

345

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

345

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

346

[0, 1, 2, 3, 4]

346

[0, 1, 2, 3, 4]

347

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

347

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

348

[0, 1, 2]

348

[0, 1, 2]

349

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

349

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

350

[2]

350

[2]

351

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

351

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

352

[3, 4, 5]

352

[3, 4, 5]

353

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

353

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

354

[3, 4]

354

[3, 4]

355

356

Discontiguous cases:

356

Discontiguous cases:

357

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

357

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

358

[1, 3, 5]

358

[1, 3, 5]

359

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

359

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

360

[1]

360

[1]

361

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

361

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

362

[3, 5]

362

[3, 5]

363

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

363

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

364

[3, 5]

364

[3, 5]

365

"""

365

"""

366

length = revlog.length

366

length = revlog.length

367

368

if endidx is None:

368

if endidx is None:

369

endidx = len(revs)

369

endidx = len(revs)

370

371

# If we have a non-emtpy delta candidate, there are nothing to trim

371

# If we have a non-emtpy delta candidate, there are nothing to trim

372

if revs[endidx - 1] < len(revlog):

372

if revs[endidx - 1] < len(revlog):

373

# Trim empty revs at the end, except the very first revision of a chain

373

# Trim empty revs at the end, except the very first revision of a chain

374

while (endidx > 1

374

while (endidx > 1

375

and endidx > startidx

375

and endidx > startidx

376

and length(revs[endidx - 1]) == 0):

376

and length(revs[endidx - 1]) == 0):

377

endidx -= 1

377

endidx -= 1

378

379

return revs[startidx:endidx]

379

return revs[startidx:endidx]

380

381

def segmentspan(revlog, revs, ~~deltainfo~~=~~None~~):

381

def segmentspan(revlog, revs):

382

"""Get the byte span of a segment of revisions

382

"""Get the byte span of a segment of revisions

383

384

revs is a sorted array of revision numbers

384

revs is a sorted array of revision numbers

385

386

>>> revlog = _testrevlog([

386

>>> revlog = _testrevlog([

387

... 5, #0

387

... 5, #0

388

... 10, #1

388

... 10, #1

389

... 12, #2

389

... 12, #2

390

... 12, #3 (empty)

390

... 12, #3 (empty)

391

... 17, #4

391

... 17, #4

392

... ])

392

... ])

393

394

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

394

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

395

17

395

17

396

>>> segmentspan(revlog, [0, 4])

396

>>> segmentspan(revlog, [0, 4])

397

17

397

17

398

>>> segmentspan(revlog, [3, 4])

398

>>> segmentspan(revlog, [3, 4])

399

5

399

5

400

>>> segmentspan(revlog, [1, 2, 3,])

400

>>> segmentspan(revlog, [1, 2, 3,])

401

7

401

7

402

>>> segmentspan(revlog, [1, 3])

402

>>> segmentspan(revlog, [1, 3])

403

7

403

7

404

"""

404

"""

405

if not revs:

405

if not revs:

406

return 0

406

return 0

407

if deltainfo is not None and len(revlog) <= revs[-1]:

407

end = revlog.end(revs[-1])

408

if len(revs) == 1:

409

return deltainfo.deltalen

410

offset = revlog.end(len(revlog) - 1)

411

end = deltainfo.deltalen + offset

412

else:

413

end = revlog.end(revs[-1])

414

return end - revlog.start(revs[0])

408

return end - revlog.start(revs[0])

415

409

416

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

410

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

417

"""build full text from a (base, delta) pair and other metadata"""

411

"""build full text from a (base, delta) pair and other metadata"""

418

# special case deltas which replace entire base; no need to decode

412

# special case deltas which replace entire base; no need to decode

419

# base revision. this neatly avoids censored bases, which throw when

413

# base revision. this neatly avoids censored bases, which throw when

420

# they're decoded.

414

# they're decoded.

421

hlen = struct.calcsize(">lll")

415

hlen = struct.calcsize(">lll")

422

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

416

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

423

len(delta) - hlen):

417

len(delta) - hlen):

424

fulltext = delta[hlen:]

418

fulltext = delta[hlen:]

425

else:

419

else:

426

# deltabase is rawtext before changed by flag processors, which is

420

# deltabase is rawtext before changed by flag processors, which is

427

# equivalent to non-raw text

421

# equivalent to non-raw text

428

basetext = revlog.revision(baserev, _df=fh, raw=False)

422

basetext = revlog.revision(baserev, _df=fh, raw=False)

429

fulltext = mdiff.patch(basetext, delta)

423

fulltext = mdiff.patch(basetext, delta)

430

424

431

try:

425

try:

432

res = revlog._processflags(fulltext, flags, 'read', raw=True)

426

res = revlog._processflags(fulltext, flags, 'read', raw=True)

433

fulltext, validatehash = res

427

fulltext, validatehash = res

434

if validatehash:

428

if validatehash:

435

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

429

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

436

if flags & REVIDX_ISCENSORED:

430

if flags & REVIDX_ISCENSORED:

437

raise error.StorageError(_('node %s is not censored') %

431

raise error.StorageError(_('node %s is not censored') %

438

expectednode)

432

expectednode)

439

except error.CensoredNodeError:

433

except error.CensoredNodeError:

440

# must pass the censored index flag to add censored revisions

434

# must pass the censored index flag to add censored revisions

441

if not flags & REVIDX_ISCENSORED:

435

if not flags & REVIDX_ISCENSORED:

442

raise

436

raise

443

return fulltext

437

return fulltext

444

438

445

@attr.s(slots=True, frozen=True)

439

@attr.s(slots=True, frozen=True)

446

class _deltainfo(object):

440

class _deltainfo(object):

447

distance = attr.ib()

441

distance = attr.ib()

448

deltalen = attr.ib()

442

deltalen = attr.ib()

449

data = attr.ib()

443

data = attr.ib()

450

base = attr.ib()

444

base = attr.ib()

451

chainbase = attr.ib()

445

chainbase = attr.ib()

452

chainlen = attr.ib()

446

chainlen = attr.ib()

453

compresseddeltalen = attr.ib()

447

compresseddeltalen = attr.ib()

454

snapshotdepth = attr.ib()

448

snapshotdepth = attr.ib()

455

449

456

def isgooddeltainfo(revlog, deltainfo, revinfo):

450

def isgooddeltainfo(revlog, deltainfo, revinfo):

457

"""Returns True if the given delta is good. Good means that it is within

451

"""Returns True if the given delta is good. Good means that it is within

458

the disk span, disk size, and chain length bounds that we know to be

452

the disk span, disk size, and chain length bounds that we know to be

459

performant."""

453

performant."""

460

if deltainfo is None:

454

if deltainfo is None:

461

return False

455

return False

462

456

463

# - 'deltainfo.distance' is the distance from the base revision --

457

# - 'deltainfo.distance' is the distance from the base revision --

464

# bounding it limits the amount of I/O we need to do.

458

# bounding it limits the amount of I/O we need to do.

465

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

459

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

466

# deltas we need to apply -- bounding it limits the amount of CPU

460

# deltas we need to apply -- bounding it limits the amount of CPU

467

# we consume.

461

# we consume.

468

462

469

textlen = revinfo.textlen

463

textlen = revinfo.textlen

470

defaultmax = textlen * 4

464

defaultmax = textlen * 4

471

maxdist = revlog._maxdeltachainspan

465

maxdist = revlog._maxdeltachainspan

472

if not maxdist:

466

if not maxdist:

473

maxdist = deltainfo.distance # ensure the conditional pass

467

maxdist = deltainfo.distance # ensure the conditional pass

474

maxdist = max(maxdist, defaultmax)

468

maxdist = max(maxdist, defaultmax)

475

469

476

# Bad delta from read span:

470

# Bad delta from read span:

477

#

471

#

478

# If the span of data read is larger than the maximum allowed.

472

# If the span of data read is larger than the maximum allowed.

479

#

473

#

480

# In the sparse-revlog case, we rely on the associated "sparse reading"

474

# In the sparse-revlog case, we rely on the associated "sparse reading"

481

# to avoid issue related to the span of data. In theory, it would be

475

# to avoid issue related to the span of data. In theory, it would be

482

# possible to build pathological revlog where delta pattern would lead

476

# possible to build pathological revlog where delta pattern would lead

483

# to too many reads. However, they do not happen in practice at all. So

477

# to too many reads. However, they do not happen in practice at all. So

484

# we skip the span check entirely.

478

# we skip the span check entirely.

485

if not revlog._sparserevlog and maxdist < deltainfo.distance:

479

if not revlog._sparserevlog and maxdist < deltainfo.distance:

486

return False

480

return False

487

481

488

# Bad delta from new delta size:

482

# Bad delta from new delta size:

489

#

483

#

490

# If the delta size is larger than the target text, storing the

484

# If the delta size is larger than the target text, storing the

491

# delta will be inefficient.

485

# delta will be inefficient.

492

if textlen < deltainfo.deltalen:

486

if textlen < deltainfo.deltalen:

493

return False

487

return False

494

488

495

# Bad delta from cumulated payload size:

489

# Bad delta from cumulated payload size:

496

#

490

#

497

# If the sum of delta get larger than K * target text length.

491

# If the sum of delta get larger than K * target text length.

498

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

492

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

499

return False

493

return False

500

494

501

# Bad delta from chain length:

495

# Bad delta from chain length:

502

#

496

#

503

# If the number of delta in the chain gets too high.

497

# If the number of delta in the chain gets too high.

504

if (revlog._maxchainlen

498

if (revlog._maxchainlen

505

and revlog._maxchainlen < deltainfo.chainlen):

499

and revlog._maxchainlen < deltainfo.chainlen):

506

return False

500

return False

507

501

508

# bad delta from intermediate snapshot size limit

502

# bad delta from intermediate snapshot size limit

509

#

503

#

510

# If an intermediate snapshot size is higher than the limit. The

504

# If an intermediate snapshot size is higher than the limit. The

511

# limit exist to prevent endless chain of intermediate delta to be

505

# limit exist to prevent endless chain of intermediate delta to be

512

# created.

506

# created.

513

if (deltainfo.snapshotdepth is not None and

507

if (deltainfo.snapshotdepth is not None and

514

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

508

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

515

return False

509

return False

516

510

517

# bad delta if new intermediate snapshot is larger than the previous

511

# bad delta if new intermediate snapshot is larger than the previous

518

# snapshot

512

# snapshot

519

if (deltainfo.snapshotdepth

513

if (deltainfo.snapshotdepth

520

and revlog.length(deltainfo.base) < deltainfo.deltalen):

514

and revlog.length(deltainfo.base) < deltainfo.deltalen):

521

return False

515

return False

522

516

523

return True

517

return True

524

518

525

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

519

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

526

"""Provides group of revision to be tested as delta base

520

"""Provides group of revision to be tested as delta base

527

521

528

This top level function focus on emitting groups with unique and worthwhile

522

This top level function focus on emitting groups with unique and worthwhile

529

content. See _raw_candidate_groups for details about the group order.

523

content. See _raw_candidate_groups for details about the group order.

530

"""

524

"""

531

# should we try to build a delta?

525

# should we try to build a delta?

532

if not (len(revlog) and revlog._storedeltachains):

526

if not (len(revlog) and revlog._storedeltachains):

533

yield None

527

yield None

534

return

528

return

535

529

536

deltalength = revlog.length

530

deltalength = revlog.length

537

deltaparent = revlog.deltaparent

531

deltaparent = revlog.deltaparent

538

good = None

532

good = None

539

533

540

deltas_limit = textlen * LIMIT_DELTA2TEXT

534

deltas_limit = textlen * LIMIT_DELTA2TEXT

541

535

542

tested = set([nullrev])

536

tested = set([nullrev])

543

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

537

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

544

while True:

538

while True:

545

temptative = candidates.send(good)

539

temptative = candidates.send(good)

546

if temptative is None:

540

if temptative is None:

547

break

541

break

548

group = []

542

group = []

549

for rev in temptative:

543

for rev in temptative:

550

# skip over empty delta (no need to include them in a chain)

544

# skip over empty delta (no need to include them in a chain)

551

while (revlog._generaldelta

545

while (revlog._generaldelta

552

and not (rev == nullrev

546

and not (rev == nullrev

553

or rev in tested

547

or rev in tested

554

or deltalength(rev))):

548

or deltalength(rev))):

555

tested.add(rev)

549

tested.add(rev)

556

rev = deltaparent(rev)

550

rev = deltaparent(rev)

557

# filter out revision we tested already

551

# filter out revision we tested already

558

if rev in tested:

552

if rev in tested:

559

continue

553

continue

560

tested.add(rev)

554

tested.add(rev)

561

# filter out delta base that will never produce good delta

555

# filter out delta base that will never produce good delta

562

if deltas_limit < revlog.length(rev):

556

if deltas_limit < revlog.length(rev):

563

continue

557

continue

564

# no need to try a delta against nullrev, this will be done as a

558

# no need to try a delta against nullrev, this will be done as a

565

# last resort.

559

# last resort.

566

if rev == nullrev:

560

if rev == nullrev:

567

continue

561

continue

568

# no delta for rawtext-changing revs (see "candelta" for why)

562

# no delta for rawtext-changing revs (see "candelta" for why)

569

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

563

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

570

continue

564

continue

571

group.append(rev)

565

group.append(rev)

572

if group:

566

if group:

573

# XXX: in the sparse revlog case, group can become large,

567

# XXX: in the sparse revlog case, group can become large,

574

# impacting performances. Some bounding or slicing mecanism

568

# impacting performances. Some bounding or slicing mecanism

575

# would help to reduce this impact.

569

# would help to reduce this impact.

576

good = yield tuple(group)

570

good = yield tuple(group)

577

yield None

571

yield None

578

572

579

def _findsnapshots(revlog, cache, start_rev):

573

def _findsnapshots(revlog, cache, start_rev):

580

"""find snapshot from start_rev to tip"""

574

"""find snapshot from start_rev to tip"""

581

deltaparent = revlog.deltaparent

575

deltaparent = revlog.deltaparent

582

issnapshot = revlog.issnapshot

576

issnapshot = revlog.issnapshot

583

for rev in revlog.revs(start_rev):

577

for rev in revlog.revs(start_rev):

584

if issnapshot(rev):

578

if issnapshot(rev):

585

cache[deltaparent(rev)].append(rev)

579

cache[deltaparent(rev)].append(rev)

586

580

587

def _refinedgroups(revlog, p1, p2, cachedelta):

581

def _refinedgroups(revlog, p1, p2, cachedelta):

588

good = None

582

good = None

589

# First we try to reuse a the delta contained in the bundle.

583

# First we try to reuse a the delta contained in the bundle.

590

# (or from the source revlog)

584

# (or from the source revlog)

591

#

585

#

592

# This logic only applies to general delta repositories and can be disabled

586

# This logic only applies to general delta repositories and can be disabled

593

# through configuration. Disabling reuse source delta is useful when

587

# through configuration. Disabling reuse source delta is useful when

594

# we want to make sure we recomputed "optimal" deltas.

588

# we want to make sure we recomputed "optimal" deltas.

595

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

589

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

596

# Assume what we received from the server is a good choice

590

# Assume what we received from the server is a good choice

597

# build delta will reuse the cache

591

# build delta will reuse the cache

598

good = yield (cachedelta[0],)

592

good = yield (cachedelta[0],)

599

if good is not None:

593

if good is not None:

600

yield None

594

yield None

601

return

595

return

602

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

596

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

603

good = yield candidates

597

good = yield candidates

604

if good is not None:

598

if good is not None:

605

break

599

break

606

600

607

# If sparse revlog is enabled, we can try to refine the available deltas

601

# If sparse revlog is enabled, we can try to refine the available deltas

608

if not revlog._sparserevlog:

602

if not revlog._sparserevlog:

609

yield None

603

yield None

610

return

604

return

611

605

612

# if we have a refinable value, try to refine it

606

# if we have a refinable value, try to refine it

613

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

607

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

614

# refine snapshot down

608

# refine snapshot down

615

previous = None

609

previous = None

616

while previous != good:

610

while previous != good:

617

previous = good

611

previous = good

618

base = revlog.deltaparent(good)

612

base = revlog.deltaparent(good)

619

if base == nullrev:

613

if base == nullrev:

620

break

614

break

621

good = yield (base,)

615

good = yield (base,)

622

# refine snapshot up

616

# refine snapshot up

623

#

617

#

624

# XXX the _findsnapshots call can be expensive and is "duplicated" with

618

# XXX the _findsnapshots call can be expensive and is "duplicated" with

625

# the one done in `_rawgroups`. Once we start working on performance,

619

# the one done in `_rawgroups`. Once we start working on performance,

626

# we should make the two logics share this computation.

620

# we should make the two logics share this computation.

627

snapshots = collections.defaultdict(list)

621

snapshots = collections.defaultdict(list)

628

_findsnapshots(revlog, snapshots, good + 1)

622

_findsnapshots(revlog, snapshots, good + 1)

629

previous = None

623

previous = None

630

while good != previous:

624

while good != previous:

631

previous = good

625

previous = good

632

children = tuple(sorted(c for c in snapshots[good]))

626

children = tuple(sorted(c for c in snapshots[good]))

633

good = yield children

627

good = yield children

634

628

635

# we have found nothing

629

# we have found nothing

636

yield None

630

yield None

637

631

638

def _rawgroups(revlog, p1, p2, cachedelta):

632

def _rawgroups(revlog, p1, p2, cachedelta):

639

"""Provides group of revision to be tested as delta base

633

"""Provides group of revision to be tested as delta base

640

634

641

This lower level function focus on emitting delta theorically interresting

635

This lower level function focus on emitting delta theorically interresting

642

without looking it any practical details.

636

without looking it any practical details.

643

637

644

The group order aims at providing fast or small candidates first.

638

The group order aims at providing fast or small candidates first.

645

"""

639

"""

646

gdelta = revlog._generaldelta

640

gdelta = revlog._generaldelta

647

sparse = revlog._sparserevlog

641

sparse = revlog._sparserevlog

648

curr = len(revlog)

642

curr = len(revlog)

649

prev = curr - 1

643

prev = curr - 1

650

deltachain = lambda rev: revlog._deltachain(rev)[0]

644

deltachain = lambda rev: revlog._deltachain(rev)[0]

651

645

652

if gdelta:

646

if gdelta:

653

# exclude already lazy tested base if any

647

# exclude already lazy tested base if any

654

parents = [p for p in (p1, p2) if p != nullrev]

648

parents = [p for p in (p1, p2) if p != nullrev]

655

649

656

if not revlog._deltabothparents and len(parents) == 2:

650

if not revlog._deltabothparents and len(parents) == 2:

657

parents.sort()

651

parents.sort()

658

# To minimize the chance of having to build a fulltext,

652

# To minimize the chance of having to build a fulltext,

659

# pick first whichever parent is closest to us (max rev)

653

# pick first whichever parent is closest to us (max rev)

660

yield (parents[1],)

654

yield (parents[1],)

661

# then the other one (min rev) if the first did not fit

655

# then the other one (min rev) if the first did not fit

662

yield (parents[0],)

656

yield (parents[0],)

663

elif len(parents) > 0:

657

elif len(parents) > 0:

664

# Test all parents (1 or 2), and keep the best candidate

658

# Test all parents (1 or 2), and keep the best candidate

665

yield parents

659

yield parents

666

660

667

if sparse and parents:

661

if sparse and parents:

668

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

662

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

669

# See if we can use an existing snapshot in the parent chains to use as

663

# See if we can use an existing snapshot in the parent chains to use as

670

# a base for a new intermediate-snapshot

664

# a base for a new intermediate-snapshot

671

#

665

#

672

# search for snapshot in parents delta chain

666

# search for snapshot in parents delta chain

673

# map: snapshot-level: snapshot-rev

667

# map: snapshot-level: snapshot-rev

674

parents_snaps = collections.defaultdict(set)

668

parents_snaps = collections.defaultdict(set)

675

candidate_chains = [deltachain(p) for p in parents]

669

candidate_chains = [deltachain(p) for p in parents]

676

for chain in candidate_chains:

670

for chain in candidate_chains:

677

for idx, s in enumerate(chain):

671

for idx, s in enumerate(chain):

678

if not revlog.issnapshot(s):

672

if not revlog.issnapshot(s):

679

break

673

break

680

parents_snaps[idx].add(s)

674

parents_snaps[idx].add(s)

681

snapfloor = min(parents_snaps[0]) + 1

675

snapfloor = min(parents_snaps[0]) + 1

682

_findsnapshots(revlog, snapshots, snapfloor)

676

_findsnapshots(revlog, snapshots, snapfloor)

683

# search for the highest "unrelated" revision

677

# search for the highest "unrelated" revision

684

#

678

#

685

# Adding snapshots used by "unrelated" revision increase the odd we

679

# Adding snapshots used by "unrelated" revision increase the odd we

686

# reuse an independant, yet better snapshot chain.

680

# reuse an independant, yet better snapshot chain.

687

#

681

#

688

# XXX instead of building a set of revisions, we could lazily enumerate

682

# XXX instead of building a set of revisions, we could lazily enumerate

689

# over the chains. That would be more efficient, however we stick to

683

# over the chains. That would be more efficient, however we stick to

690

# simple code for now.

684

# simple code for now.

691

all_revs = set()

685

all_revs = set()

692

for chain in candidate_chains:

686

for chain in candidate_chains:

693

all_revs.update(chain)

687

all_revs.update(chain)

694

other = None

688

other = None

695

for r in revlog.revs(prev, snapfloor):

689

for r in revlog.revs(prev, snapfloor):

696

if r not in all_revs:

690

if r not in all_revs:

697

other = r

691

other = r

698

break

692

break

699

if other is not None:

693

if other is not None:

700

# To avoid unfair competition, we won't use unrelated intermediate

694

# To avoid unfair competition, we won't use unrelated intermediate

701

# snapshot that are deeper than the ones from the parent delta

695

# snapshot that are deeper than the ones from the parent delta

702

# chain.

696

# chain.

703

max_depth = max(parents_snaps.keys())

697

max_depth = max(parents_snaps.keys())

704

chain = deltachain(other)

698

chain = deltachain(other)

705

for idx, s in enumerate(chain):

699

for idx, s in enumerate(chain):

706

if s < snapfloor:

700

if s < snapfloor:

707

continue

701

continue

708

if max_depth < idx:

702

if max_depth < idx:

709

break

703

break

710

if not revlog.issnapshot(s):

704

if not revlog.issnapshot(s):

711

break

705

break

712

parents_snaps[idx].add(s)

706

parents_snaps[idx].add(s)

713

# Test them as possible intermediate snapshot base

707

# Test them as possible intermediate snapshot base

714

# We test them from highest to lowest level. High level one are more

708

# We test them from highest to lowest level. High level one are more

715

# likely to result in small delta

709

# likely to result in small delta

716

floor = None

710

floor = None

717

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

711

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

718

siblings = set()

712

siblings = set()

719

for s in snaps:

713

for s in snaps:

720

siblings.update(snapshots[s])

714

siblings.update(snapshots[s])

721

# Before considering making a new intermediate snapshot, we check

715

# Before considering making a new intermediate snapshot, we check

722

# if an existing snapshot, children of base we consider, would be

716

# if an existing snapshot, children of base we consider, would be

723

# suitable.

717

# suitable.

724

#

718

#

725

# It give a change to reuse a delta chain "unrelated" to the

719

# It give a change to reuse a delta chain "unrelated" to the

726

# current revision instead of starting our own. Without such

720

# current revision instead of starting our own. Without such

727

# re-use, topological branches would keep reopening new chains.

721

# re-use, topological branches would keep reopening new chains.

728

# Creating more and more snapshot as the repository grow.

722

# Creating more and more snapshot as the repository grow.

729

723

730

if floor is not None:

724

if floor is not None:

731

# We only do this for siblings created after the one in our

725

# We only do this for siblings created after the one in our

732

# parent's delta chain. Those created before has less chances

726

# parent's delta chain. Those created before has less chances

733

# to be valid base since our ancestors had to create a new

727

# to be valid base since our ancestors had to create a new

734

# snapshot.

728

# snapshot.

735

siblings = [r for r in siblings if floor < r]

729

siblings = [r for r in siblings if floor < r]

736

yield tuple(sorted(siblings))

730

yield tuple(sorted(siblings))

737

# then test the base from our parent's delta chain.

731

# then test the base from our parent's delta chain.

738

yield tuple(sorted(snaps))

732

yield tuple(sorted(snaps))

739

floor = min(snaps)

733

floor = min(snaps)

740

# No suitable base found in the parent chain, search if any full

734

# No suitable base found in the parent chain, search if any full

741

# snapshots emitted since parent's base would be a suitable base for an

735

# snapshots emitted since parent's base would be a suitable base for an

742

# intermediate snapshot.

736

# intermediate snapshot.

743

#

737

#

744

# It give a chance to reuse a delta chain unrelated to the current

738

# It give a chance to reuse a delta chain unrelated to the current

745

# revisions instead of starting our own. Without such re-use,

739

# revisions instead of starting our own. Without such re-use,

746

# topological branches would keep reopening new full chains. Creating

740

# topological branches would keep reopening new full chains. Creating

747

# more and more snapshot as the repository grow.

741

# more and more snapshot as the repository grow.

748

yield tuple(snapshots[nullrev])

742

yield tuple(snapshots[nullrev])

749

743

750

if not sparse:

744

if not sparse:

751

# other approach failed try against prev to hopefully save us a

745

# other approach failed try against prev to hopefully save us a

752

# fulltext.

746

# fulltext.

753

yield (prev,)

747

yield (prev,)

754

748

755

class deltacomputer(object):

749

class deltacomputer(object):

756

def __init__(self, revlog):

750

def __init__(self, revlog):

757

self.revlog = revlog

751

self.revlog = revlog

758

752

759

def buildtext(self, revinfo, fh):

753

def buildtext(self, revinfo, fh):

760

"""Builds a fulltext version of a revision

754

"""Builds a fulltext version of a revision

761

755

762

revinfo: _revisioninfo instance that contains all needed info

756

revinfo: _revisioninfo instance that contains all needed info

763

fh: file handle to either the .i or the .d revlog file,

757

fh: file handle to either the .i or the .d revlog file,

764

depending on whether it is inlined or not

758

depending on whether it is inlined or not

765

"""

759

"""

766

btext = revinfo.btext

760

btext = revinfo.btext

767

if btext[0] is not None:

761

if btext[0] is not None:

768

return btext[0]

762

return btext[0]

769

763

770

revlog = self.revlog

764

revlog = self.revlog

771

cachedelta = revinfo.cachedelta

765

cachedelta = revinfo.cachedelta

772

baserev = cachedelta[0]

766

baserev = cachedelta[0]

773

delta = cachedelta[1]

767

delta = cachedelta[1]

774

768

775

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

769

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

776

revinfo.p1, revinfo.p2,

770

revinfo.p1, revinfo.p2,

777

revinfo.flags, revinfo.node)

771

revinfo.flags, revinfo.node)

778

return fulltext

772

return fulltext

779

773

780

def _builddeltadiff(self, base, revinfo, fh):

774

def _builddeltadiff(self, base, revinfo, fh):

781

revlog = self.revlog

775

revlog = self.revlog

782

t = self.buildtext(revinfo, fh)

776

t = self.buildtext(revinfo, fh)

783

if revlog.iscensored(base):

777

if revlog.iscensored(base):

784

# deltas based on a censored revision must replace the

778

# deltas based on a censored revision must replace the

785

# full content in one patch, so delta works everywhere

779

# full content in one patch, so delta works everywhere

786

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

780

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

787

delta = header + t

781

delta = header + t

788

else:

782

else:

789

ptext = revlog.revision(base, _df=fh, raw=True)

783

ptext = revlog.revision(base, _df=fh, raw=True)

790

delta = mdiff.textdiff(ptext, t)

784

delta = mdiff.textdiff(ptext, t)

791

785

792

return delta

786

return delta

793

787

794

def _builddeltainfo(self, revinfo, base, fh):

788

def _builddeltainfo(self, revinfo, base, fh):

795

# can we use the cached delta?

789

# can we use the cached delta?

796

delta = None

790

delta = None

797

if revinfo.cachedelta:

791

if revinfo.cachedelta:

798

cachebase, cachediff = revinfo.cachedelta

792

cachebase, cachediff = revinfo.cachedelta

799

#check if the diff still apply

793

#check if the diff still apply

800

currentbase = cachebase

794

currentbase = cachebase

801

while (currentbase != nullrev

795

while (currentbase != nullrev

802

and currentbase != base

796

and currentbase != base

803

and self.revlog.length(currentbase) == 0):

797

and self.revlog.length(currentbase) == 0):

804

currentbase = self.revlog.deltaparent(currentbase)

798

currentbase = self.revlog.deltaparent(currentbase)

805

if currentbase == base:

799

if currentbase == base:

806

delta = revinfo.cachedelta[1]

800

delta = revinfo.cachedelta[1]

807

if delta is None:

801

if delta is None:

808

delta = self._builddeltadiff(base, revinfo, fh)

802

delta = self._builddeltadiff(base, revinfo, fh)

809

revlog = self.revlog

803

revlog = self.revlog

810

header, data = revlog.compress(delta)

804

header, data = revlog.compress(delta)

811

deltalen = len(header) + len(data)

805

deltalen = len(header) + len(data)

812

chainbase = revlog.chainbase(base)

806

chainbase = revlog.chainbase(base)

813

offset = revlog.end(len(revlog) - 1)

807

offset = revlog.end(len(revlog) - 1)

814

dist = deltalen + offset - revlog.start(chainbase)

808

dist = deltalen + offset - revlog.start(chainbase)

815

if revlog._generaldelta:

809

if revlog._generaldelta:

816

deltabase = base

810

deltabase = base

817

else:

811

else:

818

deltabase = chainbase

812

deltabase = chainbase

819

chainlen, compresseddeltalen = revlog._chaininfo(base)

813

chainlen, compresseddeltalen = revlog._chaininfo(base)

820

chainlen += 1

814

chainlen += 1

821

compresseddeltalen += deltalen

815

compresseddeltalen += deltalen

822

816

823

revlog = self.revlog

817

revlog = self.revlog

824

snapshotdepth = None

818

snapshotdepth = None

825

if deltabase == nullrev:

819

if deltabase == nullrev:

826

snapshotdepth = 0

820

snapshotdepth = 0

827

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

821

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

828

# A delta chain should always be one full snapshot,

822

# A delta chain should always be one full snapshot,

829

# zero or more semi-snapshots, and zero or more deltas

823

# zero or more semi-snapshots, and zero or more deltas

830

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

824

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

831

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

825

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

832

snapshotdepth = len(revlog._deltachain(deltabase)[0])

826

snapshotdepth = len(revlog._deltachain(deltabase)[0])

833

827

834

return _deltainfo(dist, deltalen, (header, data), deltabase,

828

return _deltainfo(dist, deltalen, (header, data), deltabase,

835

chainbase, chainlen, compresseddeltalen,

829

chainbase, chainlen, compresseddeltalen,

836

snapshotdepth)

830

snapshotdepth)

837

831

838

def _fullsnapshotinfo(self, fh, revinfo):

832

def _fullsnapshotinfo(self, fh, revinfo):

839

curr = len(self.revlog)

833

curr = len(self.revlog)

840

rawtext = self.buildtext(revinfo, fh)

834

rawtext = self.buildtext(revinfo, fh)

841

data = self.revlog.compress(rawtext)

835

data = self.revlog.compress(rawtext)

842

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

836

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

843

deltabase = chainbase = curr

837

deltabase = chainbase = curr

844

snapshotdepth = 0

838

snapshotdepth = 0

845

chainlen = 1

839

chainlen = 1

846

840

847

return _deltainfo(dist, deltalen, data, deltabase,

841

return _deltainfo(dist, deltalen, data, deltabase,

848

chainbase, chainlen, compresseddeltalen,

842

chainbase, chainlen, compresseddeltalen,

849

snapshotdepth)

843

snapshotdepth)

850

844

851

def finddeltainfo(self, revinfo, fh):

845

def finddeltainfo(self, revinfo, fh):

852

"""Find an acceptable delta against a candidate revision

846

"""Find an acceptable delta against a candidate revision

853

847

854

revinfo: information about the revision (instance of _revisioninfo)

848

revinfo: information about the revision (instance of _revisioninfo)

855

fh: file handle to either the .i or the .d revlog file,

849

fh: file handle to either the .i or the .d revlog file,

856

depending on whether it is inlined or not

850

depending on whether it is inlined or not

857

851

858

Returns the first acceptable candidate revision, as ordered by

852

Returns the first acceptable candidate revision, as ordered by

859

_candidategroups

853

_candidategroups

860

854

861

If no suitable deltabase is found, we return delta info for a full

855

If no suitable deltabase is found, we return delta info for a full

862

snapshot.

856

snapshot.

863

"""

857

"""

864

if not revinfo.textlen:

858

if not revinfo.textlen:

865

return self._fullsnapshotinfo(fh, revinfo)

859

return self._fullsnapshotinfo(fh, revinfo)

866

860

867

# no delta for flag processor revision (see "candelta" for why)

861

# no delta for flag processor revision (see "candelta" for why)

868

# not calling candelta since only one revision needs test, also to

862

# not calling candelta since only one revision needs test, also to

869

# avoid overhead fetching flags again.

863

# avoid overhead fetching flags again.

870

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

864

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

871

return self._fullsnapshotinfo(fh, revinfo)

865

return self._fullsnapshotinfo(fh, revinfo)

872

866

873

cachedelta = revinfo.cachedelta

867

cachedelta = revinfo.cachedelta

874

p1 = revinfo.p1

868

p1 = revinfo.p1

875

p2 = revinfo.p2

869

p2 = revinfo.p2

876

revlog = self.revlog

870

revlog = self.revlog

877

871

878

deltainfo = None

872

deltainfo = None

879

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

873

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

880

groups = _candidategroups(self.revlog, revinfo.textlen,

874

groups = _candidategroups(self.revlog, revinfo.textlen,

881

p1r, p2r, cachedelta)

875

p1r, p2r, cachedelta)

882

candidaterevs = next(groups)

876

candidaterevs = next(groups)

883

while candidaterevs is not None:

877

while candidaterevs is not None:

884

nominateddeltas = []

878

nominateddeltas = []

885

if deltainfo is not None:

879

if deltainfo is not None:

886

# if we already found a good delta,

880

# if we already found a good delta,

887

# challenge it against refined candidates

881

# challenge it against refined candidates

888

nominateddeltas.append(deltainfo)

882

nominateddeltas.append(deltainfo)

889

for candidaterev in candidaterevs:

883

for candidaterev in candidaterevs:

890

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

884

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

891

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

885

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

892

nominateddeltas.append(candidatedelta)

886

nominateddeltas.append(candidatedelta)

893

if nominateddeltas:

887

if nominateddeltas:

894

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

888

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

895

if deltainfo is not None:

889

if deltainfo is not None:

896

candidaterevs = groups.send(deltainfo.base)

890

candidaterevs = groups.send(deltainfo.base)

897

else:

891

else:

898

candidaterevs = next(groups)

892

candidaterevs = next(groups)

899

893

900

if deltainfo is None:

894

if deltainfo is None:

901

deltainfo = self._fullsnapshotinfo(fh, revinfo)

895

deltainfo = self._fullsnapshotinfo(fh, revinfo)

902

return deltainfo

896

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
-            def segmentspan(revlog, revs, deltainfo=None):
+            def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
-                if deltainfo is not None and len(revlog) <= revs[-1]:
+                end = revlog.end(revs[-1])
-                    if len(revs) == 1:
-                        return deltainfo.deltalen
-                    offset = revlog.end(len(revlog) - 1)
-                    end = deltainfo.deltalen + offset
-                else:
-                    end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo