upstream/mercurial-mirror Commit - r40677:85b14f0d

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

# maximum <delta-chain-data>/<revision-text-length> ratio

35

# maximum <delta-chain-data>/<revision-text-length> ratio

36

LIMIT_DELTA2TEXT = 2

36

LIMIT_DELTA2TEXT = 2

37

38

class _testrevlog(object):

38

class _testrevlog(object):

39

"""minimalist fake revlog to use in doctests"""

39

"""minimalist fake revlog to use in doctests"""

40

41

def __init__(self, data, density=0.5, mingap=0):

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

"""data is an list of revision payload boundaries"""

42

"""data is an list of revision payload boundaries"""

43

self._data = data

43

self._data = data

44

self._srdensitythreshold = density

44

self._srdensitythreshold = density

45

self._srmingapsize = mingap

45

self._srmingapsize = mingap

46

self._snapshot = set(snapshot)

46

47

def start(self, rev):

48

def start(self, rev):

48

if rev == 0:

49

if rev == 0:

49

return 0

50

return 0

50

return self._data[rev - 1]

51

return self._data[rev - 1]

51

52

def end(self, rev):

53

def end(self, rev):

53

return self._data[rev]

54

return self._data[rev]

54

55

def length(self, rev):

56

def length(self, rev):

56

return self.end(rev) - self.start(rev)

57

return self.end(rev) - self.start(rev)

57

58

def __len__(self):

59

def __len__(self):

59

return len(self._data)

60

return len(self._data)

60

61

62

def issnapshot(self, rev):

63

return rev in self._snapshot

64

61

def slicechunk(revlog, revs, targetsize=None):

65

def slicechunk(revlog, revs, targetsize=None):

62

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

63

67

64

``revs`` is sliced into groups that should be read in one time.

68

``revs`` is sliced into groups that should be read in one time.

65

Assume that revs are sorted.

69

Assume that revs are sorted.

66

70

67

The initial chunk is sliced until the overall density (payload/chunks-span

71

The initial chunk is sliced until the overall density (payload/chunks-span

68

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

69

`revlog._srmingapsize` is skipped.

73

`revlog._srmingapsize` is skipped.

70

74

71

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

72

For consistency with other slicing choice, this limit won't go lower than

76

For consistency with other slicing choice, this limit won't go lower than

73

`revlog._srmingapsize`.

77

`revlog._srmingapsize`.

74

78

75

If individual revisions chunk are larger than this limit, they will still

79

If individual revisions chunk are larger than this limit, they will still

76

be raised individually.

80

be raised individually.

77

81

78

>>> revlog = _testrevlog([

82

>>> revlog = _testrevlog([

79

... 5, #00 (5)

83

... 5, #00 (5)

80

... 10, #01 (5)

84

... 10, #01 (5)

81

... 12, #02 (2)

85

... 12, #02 (2)

82

... 12, #03 (empty)

86

... 12, #03 (empty)

83

... 27, #04 (15)

87

... 27, #04 (15)

84

... 31, #05 (4)

88

... 31, #05 (4)

85

... 31, #06 (empty)

89

... 31, #06 (empty)

86

... 42, #07 (11)

90

... 42, #07 (11)

87

... 47, #08 (5)

91

... 47, #08 (5)

88

... 47, #09 (empty)

92

... 47, #09 (empty)

89

... 48, #10 (1)

93

... 48, #10 (1)

90

... 51, #11 (3)

94

... 51, #11 (3)

91

... 74, #12 (23)

95

... 74, #12 (23)

92

... 85, #13 (11)

96

... 85, #13 (11)

93

... 86, #14 (1)

97

... 86, #14 (1)

94

... 91, #15 (5)

98

... 91, #15 (5)

95

... ])

99

... ])

96

100

97

>>> list(slicechunk(revlog, list(range(16))))

101

>>> list(slicechunk(revlog, list(range(16))))

98

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

99

>>> list(slicechunk(revlog, [0, 15]))

103

>>> list(slicechunk(revlog, [0, 15]))

100

[[0], [15]]

104

[[0], [15]]

101

>>> list(slicechunk(revlog, [0, 11, 15]))

105

>>> list(slicechunk(revlog, [0, 11, 15]))

102

[[0], [11], [15]]

106

[[0], [11], [15]]

103

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

104

[[0], [11, 13, 15]]

108

[[0], [11, 13, 15]]

105

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

106

[[1, 2], [5, 8, 10, 11], [14]]

110

[[1, 2], [5, 8, 10, 11], [14]]

107

111

108

Slicing with a maximum chunk size

112

Slicing with a maximum chunk size

109

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

110

[[0], [11], [13], [15]]

114

[[0], [11], [13], [15]]

111

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

112

[[0], [11], [13, 15]]

116

[[0], [11], [13, 15]]

113

"""

117

"""

114

if targetsize is not None:

118

if targetsize is not None:

115

targetsize = max(targetsize, revlog._srmingapsize)

119

targetsize = max(targetsize, revlog._srmingapsize)

116

# targetsize should not be specified when evaluating delta candidates:

120

# targetsize should not be specified when evaluating delta candidates:

117

# * targetsize is used to ensure we stay within specification when reading,

121

# * targetsize is used to ensure we stay within specification when reading,

118

for chunk in _slicechunktodensity(revlog, revs,

122

for chunk in _slicechunktodensity(revlog, revs,

119

revlog._srdensitythreshold,

123

revlog._srdensitythreshold,

120

revlog._srmingapsize):

124

revlog._srmingapsize):

121

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

125

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

122

yield subchunk

126

yield subchunk

123

127

124

def _slicechunktosize(revlog, revs, targetsize=None):

128

def _slicechunktosize(revlog, revs, targetsize=None):

125

"""slice revs to match the target size

129

"""slice revs to match the target size

126

130

127

This is intended to be used on chunk that density slicing selected by that

131

This is intended to be used on chunk that density slicing selected by that

128

are still too large compared to the read garantee of revlog. This might

132

are still too large compared to the read garantee of revlog. This might

129

happens when "minimal gap size" interrupted the slicing or when chain are

133

happens when "minimal gap size" interrupted the slicing or when chain are

130

built in a way that create large blocks next to each other.

134

built in a way that create large blocks next to each other.

131

135

132

>>> revlog = _testrevlog([

136

>>> revlog = _testrevlog([

133

... 3, #0 (3)

137

... 3, #0 (3)

134

... 5, #1 (2)

138

... 5, #1 (2)

135

... 6, #2 (1)

139

... 6, #2 (1)

136

... 8, #3 (2)

140

... 8, #3 (2)

137

... 8, #4 (empty)

141

... 8, #4 (empty)

138

... 11, #5 (3)

142

... 11, #5 (3)

139

... 12, #6 (1)

143

... 12, #6 (1)

140

... 13, #7 (1)

144

... 13, #7 (1)

141

... 14, #8 (1)

145

... 14, #8 (1)

142

... ])

146

... ])

143

147

144

Cases where chunk is already small enough

148

Cases where chunk is already small enough

145

>>> list(_slicechunktosize(revlog, [0], 3))

149

>>> list(_slicechunktosize(revlog, [0], 3))

146

[[0]]

150

[[0]]

147

>>> list(_slicechunktosize(revlog, [6, 7], 3))

151

>>> list(_slicechunktosize(revlog, [6, 7], 3))

148

[[6, 7]]

152

[[6, 7]]

149

>>> list(_slicechunktosize(revlog, [0], None))

153

>>> list(_slicechunktosize(revlog, [0], None))

150

[[0]]

154

[[0]]

151

>>> list(_slicechunktosize(revlog, [6, 7], None))

155

>>> list(_slicechunktosize(revlog, [6, 7], None))

152

[[6, 7]]

156

[[6, 7]]

153

157

154

cases where we need actual slicing

158

cases where we need actual slicing

155

>>> list(_slicechunktosize(revlog, [0, 1], 3))

159

>>> list(_slicechunktosize(revlog, [0, 1], 3))

156

[[0], [1]]

160

[[0], [1]]

157

>>> list(_slicechunktosize(revlog, [1, 3], 3))

161

>>> list(_slicechunktosize(revlog, [1, 3], 3))

158

[[1], [3]]

162

[[1], [3]]

159

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

163

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

160

[[1, 2], [3]]

164

[[1, 2], [3]]

161

>>> list(_slicechunktosize(revlog, [3, 5], 3))

165

>>> list(_slicechunktosize(revlog, [3, 5], 3))

162

[[3], [5]]

166

[[3], [5]]

163

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

167

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

164

[[3], [5]]

168

[[3], [5]]

165

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

169

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

166

[[5], [6, 7, 8]]

170

[[5], [6, 7, 8]]

167

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

171

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

168

[[0], [1, 2], [3], [5], [6, 7, 8]]

172

[[0], [1, 2], [3], [5], [6, 7, 8]]

169

173

170

Case with too large individual chunk (must return valid chunk)

174

Case with too large individual chunk (must return valid chunk)

171

>>> list(_slicechunktosize(revlog, [0, 1], 2))

175

>>> list(_slicechunktosize(revlog, [0, 1], 2))

172

[[0], [1]]

176

[[0], [1]]

173

>>> list(_slicechunktosize(revlog, [1, 3], 1))

177

>>> list(_slicechunktosize(revlog, [1, 3], 1))

174

[[1], [3]]

178

[[1], [3]]

175

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

179

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

176

[[3], [5]]

180

[[3], [5]]

177

"""

181

"""

178

assert targetsize is None or 0 <= targetsize

182

assert targetsize is None or 0 <= targetsize

179

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

183

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

180

yield revs

184

yield revs

181

return

185

return

182

186

183

startrevidx = 0

187

startrevidx = 0

184

startdata = revlog.start(revs[0])

188

startdata = revlog.start(revs[0])

185

endrevidx = 0

189

endrevidx = 0

186

iterrevs = enumerate(revs)

190

iterrevs = enumerate(revs)

187

next(iterrevs) # skip first rev.

191

next(iterrevs) # skip first rev.

188

for idx, r in iterrevs:

192

for idx, r in iterrevs:

189

span = revlog.end(r) - startdata

193

span = revlog.end(r) - startdata

190

if span <= targetsize:

194

if span <= targetsize:

191

endrevidx = idx

195

endrevidx = idx

192

else:

196

else:

193

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

197

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

194

if chunk:

198

if chunk:

195

yield chunk

199

yield chunk

196

startrevidx = idx

200

startrevidx = idx

197

startdata = revlog.start(r)

201

startdata = revlog.start(r)

198

endrevidx = idx

202

endrevidx = idx

199

yield _trimchunk(revlog, revs, startrevidx)

203

yield _trimchunk(revlog, revs, startrevidx)

200

204

201

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

205

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

202

mingapsize=0):

206

mingapsize=0):

203

"""slice revs to reduce the amount of unrelated data to be read from disk.

207

"""slice revs to reduce the amount of unrelated data to be read from disk.

204

208

205

``revs`` is sliced into groups that should be read in one time.

209

``revs`` is sliced into groups that should be read in one time.

206

Assume that revs are sorted.

210

Assume that revs are sorted.

207

211

208

The initial chunk is sliced until the overall density (payload/chunks-span

212

The initial chunk is sliced until the overall density (payload/chunks-span

209

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

213

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

210

skipped.

214

skipped.

211

215

212

>>> revlog = _testrevlog([

216

>>> revlog = _testrevlog([

213

... 5, #00 (5)

217

... 5, #00 (5)

214

... 10, #01 (5)

218

... 10, #01 (5)

215

... 12, #02 (2)

219

... 12, #02 (2)

216

... 12, #03 (empty)

220

... 12, #03 (empty)

217

... 27, #04 (15)

221

... 27, #04 (15)

218

... 31, #05 (4)

222

... 31, #05 (4)

219

... 31, #06 (empty)

223

... 31, #06 (empty)

220

... 42, #07 (11)

224

... 42, #07 (11)

221

... 47, #08 (5)

225

... 47, #08 (5)

222

... 47, #09 (empty)

226

... 47, #09 (empty)

223

... 48, #10 (1)

227

... 48, #10 (1)

224

... 51, #11 (3)

228

... 51, #11 (3)

225

... 74, #12 (23)

229

... 74, #12 (23)

226

... 85, #13 (11)

230

... 85, #13 (11)

227

... 86, #14 (1)

231

... 86, #14 (1)

228

... 91, #15 (5)

232

... 91, #15 (5)

229

... ])

233

... ])

230

234

231

>>> list(_slicechunktodensity(revlog, list(range(16))))

235

>>> list(_slicechunktodensity(revlog, list(range(16))))

232

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

236

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

233

>>> list(_slicechunktodensity(revlog, [0, 15]))

237

>>> list(_slicechunktodensity(revlog, [0, 15]))

234

[[0], [15]]

238

[[0], [15]]

235

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

239

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

236

[[0], [11], [15]]

240

[[0], [11], [15]]

237

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

241

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

238

[[0], [11, 13, 15]]

242

[[0], [11, 13, 15]]

239

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

243

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

240

[[1, 2], [5, 8, 10, 11], [14]]

244

[[1, 2], [5, 8, 10, 11], [14]]

241

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

245

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

242

... mingapsize=20))

246

... mingapsize=20))

243

[[1, 2, 3, 5, 8, 10, 11], [14]]

247

[[1, 2, 3, 5, 8, 10, 11], [14]]

244

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

245

... targetdensity=0.95))

249

... targetdensity=0.95))

246

[[1, 2], [5], [8, 10, 11], [14]]

250

[[1, 2], [5], [8, 10, 11], [14]]

247

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

251

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

248

... targetdensity=0.95, mingapsize=12))

252

... targetdensity=0.95, mingapsize=12))

249

[[1, 2], [5, 8, 10, 11], [14]]

253

[[1, 2], [5, 8, 10, 11], [14]]

250

"""

254

"""

251

start = revlog.start

255

start = revlog.start

252

length = revlog.length

256

length = revlog.length

253

257

254

if len(revs) <= 1:

258

if len(revs) <= 1:

255

yield revs

259

yield revs

256

return

260

return

257

261

258

deltachainspan = segmentspan(revlog, revs)

262

deltachainspan = segmentspan(revlog, revs)

259

263

260

if deltachainspan < mingapsize:

264

if deltachainspan < mingapsize:

261

yield revs

265

yield revs

262

return

266

return

263

267

264

readdata = deltachainspan

268

readdata = deltachainspan

265

chainpayload = sum(length(r) for r in revs)

269

chainpayload = sum(length(r) for r in revs)

266

270

267

if deltachainspan:

271

if deltachainspan:

268

density = chainpayload / float(deltachainspan)

272

density = chainpayload / float(deltachainspan)

269

else:

273

else:

270

density = 1.0

274

density = 1.0

271

275

272

if density >= targetdensity:

276

if density >= targetdensity:

273

yield revs

277

yield revs

274

return

278

return

275

279

276

# Store the gaps in a heap to have them sorted by decreasing size

280

# Store the gaps in a heap to have them sorted by decreasing size

277

gaps = []

281

gaps = []

278

prevend = None

282

prevend = None

279

for i, rev in enumerate(revs):

283

for i, rev in enumerate(revs):

280

revstart = start(rev)

284

revstart = start(rev)

281

revlen = length(rev)

285

revlen = length(rev)

282

286

283

# Skip empty revisions to form larger holes

287

# Skip empty revisions to form larger holes

284

if revlen == 0:

288

if revlen == 0:

285

continue

289

continue

286

290

287

if prevend is not None:

291

if prevend is not None:

288

gapsize = revstart - prevend

292

gapsize = revstart - prevend

289

# only consider holes that are large enough

293

# only consider holes that are large enough

290

if gapsize > mingapsize:

294

if gapsize > mingapsize:

291

gaps.append((gapsize, i))

295

gaps.append((gapsize, i))

292

296

293

prevend = revstart + revlen

297

prevend = revstart + revlen

294

# sort the gaps to pop them from largest to small

298

# sort the gaps to pop them from largest to small

295

gaps.sort()

299

gaps.sort()

296

300

297

# Collect the indices of the largest holes until the density is acceptable

301

# Collect the indices of the largest holes until the density is acceptable

298

selected = []

302

selected = []

299

while gaps and density < targetdensity:

303

while gaps and density < targetdensity:

300

gapsize, gapidx = gaps.pop()

304

gapsize, gapidx = gaps.pop()

301

305

302

selected.append(gapidx)

306

selected.append(gapidx)

303

307

304

# the gap sizes are stored as negatives to be sorted decreasingly

308

# the gap sizes are stored as negatives to be sorted decreasingly

305

# by the heap

309

# by the heap

306

readdata -= gapsize

310

readdata -= gapsize

307

if readdata > 0:

311

if readdata > 0:

308

density = chainpayload / float(readdata)

312

density = chainpayload / float(readdata)

309

else:

313

else:

310

density = 1.0

314

density = 1.0

311

selected.sort()

315

selected.sort()

312

316

313

# Cut the revs at collected indices

317

# Cut the revs at collected indices

314

previdx = 0

318

previdx = 0

315

for idx in selected:

319

for idx in selected:

316

320

317

chunk = _trimchunk(revlog, revs, previdx, idx)

321

chunk = _trimchunk(revlog, revs, previdx, idx)

318

if chunk:

322

if chunk:

319

yield chunk

323

yield chunk

320

324

321

previdx = idx

325

previdx = idx

322

326

323

chunk = _trimchunk(revlog, revs, previdx)

327

chunk = _trimchunk(revlog, revs, previdx)

324

if chunk:

328

if chunk:

325

yield chunk

329

yield chunk

326

330

327

def _trimchunk(revlog, revs, startidx, endidx=None):

331

def _trimchunk(revlog, revs, startidx, endidx=None):

328

"""returns revs[startidx:endidx] without empty trailing revs

332

"""returns revs[startidx:endidx] without empty trailing revs

329

333

330

Doctest Setup

334

Doctest Setup

331

>>> revlog = _testrevlog([

335

>>> revlog = _testrevlog([

332

... 5, #0

336

... 5, #0

333

... 10, #1

337

... 10, #1

334

... 12, #2

338

... 12, #2

335

... 12, #3 (empty)

339

... 12, #3 (empty)

336

... 17, #4

340

... 17, #4

337

... 21, #5

341

... 21, #5

338

... 21, #6 (empty)

342

... 21, #6 (empty)

339

... ])

343

... ])

340

344

341

Contiguous cases:

345

Contiguous cases:

342

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

346

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

343

[0, 1, 2, 3, 4, 5]

347

[0, 1, 2, 3, 4, 5]

344

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

348

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

345

[0, 1, 2, 3, 4]

349

[0, 1, 2, 3, 4]

346

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

350

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

347

[0, 1, 2]

351

[0, 1, 2]

348

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

352

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

349

[2]

353

[2]

350

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

354

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

351

[3, 4, 5]

355

[3, 4, 5]

352

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

356

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

353

[3, 4]

357

[3, 4]

354

358

355

Discontiguous cases:

359

Discontiguous cases:

356

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

360

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

357

[1, 3, 5]

361

[1, 3, 5]

358

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

362

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

359

[1]

363

[1]

360

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

364

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

361

[3, 5]

365

[3, 5]

362

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

366

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

363

[3, 5]

367

[3, 5]

364

"""

368

"""

365

length = revlog.length

369

length = revlog.length

366

370

367

if endidx is None:

371

if endidx is None:

368

endidx = len(revs)

372

endidx = len(revs)

369

373

370

# If we have a non-emtpy delta candidate, there are nothing to trim

374

# If we have a non-emtpy delta candidate, there are nothing to trim

371

if revs[endidx - 1] < len(revlog):

375

if revs[endidx - 1] < len(revlog):

372

# Trim empty revs at the end, except the very first revision of a chain

376

# Trim empty revs at the end, except the very first revision of a chain

373

while (endidx > 1

377

while (endidx > 1

374

and endidx > startidx

378

and endidx > startidx

375

and length(revs[endidx - 1]) == 0):

379

and length(revs[endidx - 1]) == 0):

376

endidx -= 1

380

endidx -= 1

377

381

378

return revs[startidx:endidx]

382

return revs[startidx:endidx]

379

383

380

def segmentspan(revlog, revs):

384

def segmentspan(revlog, revs):

381

"""Get the byte span of a segment of revisions

385

"""Get the byte span of a segment of revisions

382

386

383

revs is a sorted array of revision numbers

387

revs is a sorted array of revision numbers

384

388

385

>>> revlog = _testrevlog([

389

>>> revlog = _testrevlog([

386

... 5, #0

390

... 5, #0

387

... 10, #1

391

... 10, #1

388

... 12, #2

392

... 12, #2

389

... 12, #3 (empty)

393

... 12, #3 (empty)

390

... 17, #4

394

... 17, #4

391

... ])

395

... ])

392

396

393

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

397

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

394

17

398

17

395

>>> segmentspan(revlog, [0, 4])

399

>>> segmentspan(revlog, [0, 4])

396

17

400

17

397

>>> segmentspan(revlog, [3, 4])

401

>>> segmentspan(revlog, [3, 4])

398

5

402

5

399

>>> segmentspan(revlog, [1, 2, 3,])

403

>>> segmentspan(revlog, [1, 2, 3,])

400

7

404

7

401

>>> segmentspan(revlog, [1, 3])

405

>>> segmentspan(revlog, [1, 3])

402

7

406

7

403

"""

407

"""

404

if not revs:

408

if not revs:

405

return 0

409

return 0

406

end = revlog.end(revs[-1])

410

end = revlog.end(revs[-1])

407

return end - revlog.start(revs[0])

411

return end - revlog.start(revs[0])

408

412

409

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

413

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

410

"""build full text from a (base, delta) pair and other metadata"""

414

"""build full text from a (base, delta) pair and other metadata"""

411

# special case deltas which replace entire base; no need to decode

415

# special case deltas which replace entire base; no need to decode

412

# base revision. this neatly avoids censored bases, which throw when

416

# base revision. this neatly avoids censored bases, which throw when

413

# they're decoded.

417

# they're decoded.

414

hlen = struct.calcsize(">lll")

418

hlen = struct.calcsize(">lll")

415

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

419

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

416

len(delta) - hlen):

420

len(delta) - hlen):

417

fulltext = delta[hlen:]

421

fulltext = delta[hlen:]

418

else:

422

else:

419

# deltabase is rawtext before changed by flag processors, which is

423

# deltabase is rawtext before changed by flag processors, which is

420

# equivalent to non-raw text

424

# equivalent to non-raw text

421

basetext = revlog.revision(baserev, _df=fh, raw=False)

425

basetext = revlog.revision(baserev, _df=fh, raw=False)

422

fulltext = mdiff.patch(basetext, delta)

426

fulltext = mdiff.patch(basetext, delta)

423

427

424

try:

428

try:

425

res = revlog._processflags(fulltext, flags, 'read', raw=True)

429

res = revlog._processflags(fulltext, flags, 'read', raw=True)

426

fulltext, validatehash = res

430

fulltext, validatehash = res

427

if validatehash:

431

if validatehash:

428

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

432

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

429

if flags & REVIDX_ISCENSORED:

433

if flags & REVIDX_ISCENSORED:

430

raise error.StorageError(_('node %s is not censored') %

434

raise error.StorageError(_('node %s is not censored') %

431

expectednode)

435

expectednode)

432

except error.CensoredNodeError:

436

except error.CensoredNodeError:

433

# must pass the censored index flag to add censored revisions

437

# must pass the censored index flag to add censored revisions

434

if not flags & REVIDX_ISCENSORED:

438

if not flags & REVIDX_ISCENSORED:

435

raise

439

raise

436

return fulltext

440

return fulltext

437

441

438

@attr.s(slots=True, frozen=True)

442

@attr.s(slots=True, frozen=True)

439

class _deltainfo(object):

443

class _deltainfo(object):

440

distance = attr.ib()

444

distance = attr.ib()

441

deltalen = attr.ib()

445

deltalen = attr.ib()

442

data = attr.ib()

446

data = attr.ib()

443

base = attr.ib()

447

base = attr.ib()

444

chainbase = attr.ib()

448

chainbase = attr.ib()

445

chainlen = attr.ib()

449

chainlen = attr.ib()

446

compresseddeltalen = attr.ib()

450

compresseddeltalen = attr.ib()

447

snapshotdepth = attr.ib()

451

snapshotdepth = attr.ib()

448

452

449

def isgooddeltainfo(revlog, deltainfo, revinfo):

453

def isgooddeltainfo(revlog, deltainfo, revinfo):

450

"""Returns True if the given delta is good. Good means that it is within

454

"""Returns True if the given delta is good. Good means that it is within

451

the disk span, disk size, and chain length bounds that we know to be

455

the disk span, disk size, and chain length bounds that we know to be

452

performant."""

456

performant."""

453

if deltainfo is None:

457

if deltainfo is None:

454

return False

458

return False

455

459

456

# - 'deltainfo.distance' is the distance from the base revision --

460

# - 'deltainfo.distance' is the distance from the base revision --

457

# bounding it limits the amount of I/O we need to do.

461

# bounding it limits the amount of I/O we need to do.

458

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

462

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

459

# deltas we need to apply -- bounding it limits the amount of CPU

463

# deltas we need to apply -- bounding it limits the amount of CPU

460

# we consume.

464

# we consume.

461

465

462

textlen = revinfo.textlen

466

textlen = revinfo.textlen

463

defaultmax = textlen * 4

467

defaultmax = textlen * 4

464

maxdist = revlog._maxdeltachainspan

468

maxdist = revlog._maxdeltachainspan

465

if not maxdist:

469

if not maxdist:

466

maxdist = deltainfo.distance # ensure the conditional pass

470

maxdist = deltainfo.distance # ensure the conditional pass

467

maxdist = max(maxdist, defaultmax)

471

maxdist = max(maxdist, defaultmax)

468

472

469

# Bad delta from read span:

473

# Bad delta from read span:

470

#

474

#

471

# If the span of data read is larger than the maximum allowed.

475

# If the span of data read is larger than the maximum allowed.

472

#

476

#

473

# In the sparse-revlog case, we rely on the associated "sparse reading"

477

# In the sparse-revlog case, we rely on the associated "sparse reading"

474

# to avoid issue related to the span of data. In theory, it would be

478

# to avoid issue related to the span of data. In theory, it would be

475

# possible to build pathological revlog where delta pattern would lead

479

# possible to build pathological revlog where delta pattern would lead

476

# to too many reads. However, they do not happen in practice at all. So

480

# to too many reads. However, they do not happen in practice at all. So

477

# we skip the span check entirely.

481

# we skip the span check entirely.

478

if not revlog._sparserevlog and maxdist < deltainfo.distance:

482

if not revlog._sparserevlog and maxdist < deltainfo.distance:

479

return False

483

return False

480

484

481

# Bad delta from new delta size:

485

# Bad delta from new delta size:

482

#

486

#

483

# If the delta size is larger than the target text, storing the

487

# If the delta size is larger than the target text, storing the

484

# delta will be inefficient.

488

# delta will be inefficient.

485

if textlen < deltainfo.deltalen:

489

if textlen < deltainfo.deltalen:

486

return False

490

return False

487

491

488

# Bad delta from cumulated payload size:

492

# Bad delta from cumulated payload size:

489

#

493

#

490

# If the sum of delta get larger than K * target text length.

494

# If the sum of delta get larger than K * target text length.

491

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

495

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

492

return False

496

return False

493

497

494

# Bad delta from chain length:

498

# Bad delta from chain length:

495

#

499

#

496

# If the number of delta in the chain gets too high.

500

# If the number of delta in the chain gets too high.

497

if (revlog._maxchainlen

501

if (revlog._maxchainlen

498

and revlog._maxchainlen < deltainfo.chainlen):

502

and revlog._maxchainlen < deltainfo.chainlen):

499

return False

503

return False

500

504

501

# bad delta from intermediate snapshot size limit

505

# bad delta from intermediate snapshot size limit

502

#

506

#

503

# If an intermediate snapshot size is higher than the limit. The

507

# If an intermediate snapshot size is higher than the limit. The

504

# limit exist to prevent endless chain of intermediate delta to be

508

# limit exist to prevent endless chain of intermediate delta to be

505

# created.

509

# created.

506

if (deltainfo.snapshotdepth is not None and

510

if (deltainfo.snapshotdepth is not None and

507

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

511

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

508

return False

512

return False

509

513

510

# bad delta if new intermediate snapshot is larger than the previous

514

# bad delta if new intermediate snapshot is larger than the previous

511

# snapshot

515

# snapshot

512

if (deltainfo.snapshotdepth

516

if (deltainfo.snapshotdepth

513

and revlog.length(deltainfo.base) < deltainfo.deltalen):

517

and revlog.length(deltainfo.base) < deltainfo.deltalen):

514

return False

518

return False

515

519

516

return True

520

return True

517

521

518

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

522

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

519

"""Provides group of revision to be tested as delta base

523

"""Provides group of revision to be tested as delta base

520

524

521

This top level function focus on emitting groups with unique and worthwhile

525

This top level function focus on emitting groups with unique and worthwhile

522

content. See _raw_candidate_groups for details about the group order.

526

content. See _raw_candidate_groups for details about the group order.

523

"""

527

"""

524

# should we try to build a delta?

528

# should we try to build a delta?

525

if not (len(revlog) and revlog._storedeltachains):

529

if not (len(revlog) and revlog._storedeltachains):

526

yield None

530

yield None

527

return

531

return

528

532

529

deltalength = revlog.length

533

deltalength = revlog.length

530

deltaparent = revlog.deltaparent

534

deltaparent = revlog.deltaparent

531

good = None

535

good = None

532

536

533

deltas_limit = textlen * LIMIT_DELTA2TEXT

537

deltas_limit = textlen * LIMIT_DELTA2TEXT

534

538

535

tested = set([nullrev])

539

tested = set([nullrev])

536

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

540

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

537

while True:

541

while True:

538

temptative = candidates.send(good)

542

temptative = candidates.send(good)

539

if temptative is None:

543

if temptative is None:

540

break

544

break

541

group = []

545

group = []

542

for rev in temptative:

546

for rev in temptative:

543

# skip over empty delta (no need to include them in a chain)

547

# skip over empty delta (no need to include them in a chain)

544

while (revlog._generaldelta

548

while (revlog._generaldelta

545

and not (rev == nullrev

549

and not (rev == nullrev

546

or rev in tested

550

or rev in tested

547

or deltalength(rev))):

551

or deltalength(rev))):

548

tested.add(rev)

552

tested.add(rev)

549

rev = deltaparent(rev)

553

rev = deltaparent(rev)

550

# filter out revision we tested already

554

# filter out revision we tested already

551

if rev in tested:

555

if rev in tested:

552

continue

556

continue

553

tested.add(rev)

557

tested.add(rev)

554

# filter out delta base that will never produce good delta

558

# filter out delta base that will never produce good delta

555

if deltas_limit < revlog.length(rev):

559

if deltas_limit < revlog.length(rev):

556

continue

560

continue

557

# no need to try a delta against nullrev, this will be done as a

561

# no need to try a delta against nullrev, this will be done as a

558

# last resort.

562

# last resort.

559

if rev == nullrev:

563

if rev == nullrev:

560

continue

564

continue

561

# no delta for rawtext-changing revs (see "candelta" for why)

565

# no delta for rawtext-changing revs (see "candelta" for why)

562

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

566

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

563

continue

567

continue

564

group.append(rev)

568

group.append(rev)

565

if group:

569

if group:

566

# XXX: in the sparse revlog case, group can become large,

570

# XXX: in the sparse revlog case, group can become large,

567

# impacting performances. Some bounding or slicing mecanism

571

# impacting performances. Some bounding or slicing mecanism

568

# would help to reduce this impact.

572

# would help to reduce this impact.

569

good = yield tuple(group)

573

good = yield tuple(group)

570

yield None

574

yield None

571

575

572

def _findsnapshots(revlog, cache, start_rev):

576

def _findsnapshots(revlog, cache, start_rev):

573

"""find snapshot from start_rev to tip"""

577

"""find snapshot from start_rev to tip"""

574

deltaparent = revlog.deltaparent

578

deltaparent = revlog.deltaparent

575

issnapshot = revlog.issnapshot

579

issnapshot = revlog.issnapshot

576

for rev in revlog.revs(start_rev):

580

for rev in revlog.revs(start_rev):

577

if issnapshot(rev):

581

if issnapshot(rev):

578

cache[deltaparent(rev)].append(rev)

582

cache[deltaparent(rev)].append(rev)

579

583

580

def _refinedgroups(revlog, p1, p2, cachedelta):

584

def _refinedgroups(revlog, p1, p2, cachedelta):

581

good = None

585

good = None

582

# First we try to reuse a the delta contained in the bundle.

586

# First we try to reuse a the delta contained in the bundle.

583

# (or from the source revlog)

587

# (or from the source revlog)

584

#

588

#

585

# This logic only applies to general delta repositories and can be disabled

589

# This logic only applies to general delta repositories and can be disabled

586

# through configuration. Disabling reuse source delta is useful when

590

# through configuration. Disabling reuse source delta is useful when

587

# we want to make sure we recomputed "optimal" deltas.

591

# we want to make sure we recomputed "optimal" deltas.

588

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

592

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

589

# Assume what we received from the server is a good choice

593

# Assume what we received from the server is a good choice

590

# build delta will reuse the cache

594

# build delta will reuse the cache

591

good = yield (cachedelta[0],)

595

good = yield (cachedelta[0],)

592

if good is not None:

596

if good is not None:

593

yield None

597

yield None

594

return

598

return

595

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

599

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

596

good = yield candidates

600

good = yield candidates

597

if good is not None:

601

if good is not None:

598

break

602

break

599

603

600

# If sparse revlog is enabled, we can try to refine the available deltas

604

# If sparse revlog is enabled, we can try to refine the available deltas

601

if not revlog._sparserevlog:

605

if not revlog._sparserevlog:

602

yield None

606

yield None

603

return

607

return

604

608

605

# if we have a refinable value, try to refine it

609

# if we have a refinable value, try to refine it

606

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

610

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

607

# refine snapshot down

611

# refine snapshot down

608

previous = None

612

previous = None

609

while previous != good:

613

while previous != good:

610

previous = good

614

previous = good

611

base = revlog.deltaparent(good)

615

base = revlog.deltaparent(good)

612

if base == nullrev:

616

if base == nullrev:

613

break

617

break

614

good = yield (base,)

618

good = yield (base,)

615

# refine snapshot up

619

# refine snapshot up

616

#

620

#

617

# XXX the _findsnapshots call can be expensive and is "duplicated" with

621

# XXX the _findsnapshots call can be expensive and is "duplicated" with

618

# the one done in `_rawgroups`. Once we start working on performance,

622

# the one done in `_rawgroups`. Once we start working on performance,

619

# we should make the two logics share this computation.

623

# we should make the two logics share this computation.

620

snapshots = collections.defaultdict(list)

624

snapshots = collections.defaultdict(list)

621

_findsnapshots(revlog, snapshots, good + 1)

625

_findsnapshots(revlog, snapshots, good + 1)

622

previous = None

626

previous = None

623

while good != previous:

627

while good != previous:

624

previous = good

628

previous = good

625

children = tuple(sorted(c for c in snapshots[good]))

629

children = tuple(sorted(c for c in snapshots[good]))

626

good = yield children

630

good = yield children

627

631

628

# we have found nothing

632

# we have found nothing

629

yield None

633

yield None

630

634

631

def _rawgroups(revlog, p1, p2, cachedelta):

635

def _rawgroups(revlog, p1, p2, cachedelta):

632

"""Provides group of revision to be tested as delta base

636

"""Provides group of revision to be tested as delta base

633

637

634

This lower level function focus on emitting delta theorically interresting

638

This lower level function focus on emitting delta theorically interresting

635

without looking it any practical details.

639

without looking it any practical details.

636

640

637

The group order aims at providing fast or small candidates first.

641

The group order aims at providing fast or small candidates first.

638

"""

642

"""

639

gdelta = revlog._generaldelta

643

gdelta = revlog._generaldelta

640

sparse = revlog._sparserevlog

644

sparse = revlog._sparserevlog

641

curr = len(revlog)

645

curr = len(revlog)

642

prev = curr - 1

646

prev = curr - 1

643

deltachain = lambda rev: revlog._deltachain(rev)[0]

647

deltachain = lambda rev: revlog._deltachain(rev)[0]

644

648

645

if gdelta:

649

if gdelta:

646

# exclude already lazy tested base if any

650

# exclude already lazy tested base if any

647

parents = [p for p in (p1, p2) if p != nullrev]

651

parents = [p for p in (p1, p2) if p != nullrev]

648

652

649

if not revlog._deltabothparents and len(parents) == 2:

653

if not revlog._deltabothparents and len(parents) == 2:

650

parents.sort()

654

parents.sort()

651

# To minimize the chance of having to build a fulltext,

655

# To minimize the chance of having to build a fulltext,

652

# pick first whichever parent is closest to us (max rev)

656

# pick first whichever parent is closest to us (max rev)

653

yield (parents[1],)

657

yield (parents[1],)

654

# then the other one (min rev) if the first did not fit

658

# then the other one (min rev) if the first did not fit

655

yield (parents[0],)

659

yield (parents[0],)

656

elif len(parents) > 0:

660

elif len(parents) > 0:

657

# Test all parents (1 or 2), and keep the best candidate

661

# Test all parents (1 or 2), and keep the best candidate

658

yield parents

662

yield parents

659

663

660

if sparse and parents:

664

if sparse and parents:

661

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

665

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

662

# See if we can use an existing snapshot in the parent chains to use as

666

# See if we can use an existing snapshot in the parent chains to use as

663

# a base for a new intermediate-snapshot

667

# a base for a new intermediate-snapshot

664

#

668

#

665

# search for snapshot in parents delta chain

669

# search for snapshot in parents delta chain

666

# map: snapshot-level: snapshot-rev

670

# map: snapshot-level: snapshot-rev

667

parents_snaps = collections.defaultdict(set)

671

parents_snaps = collections.defaultdict(set)

668

candidate_chains = [deltachain(p) for p in parents]

672

candidate_chains = [deltachain(p) for p in parents]

669

for chain in candidate_chains:

673

for chain in candidate_chains:

670

for idx, s in enumerate(chain):

674

for idx, s in enumerate(chain):

671

if not revlog.issnapshot(s):

675

if not revlog.issnapshot(s):

672

break

676

break

673

parents_snaps[idx].add(s)

677

parents_snaps[idx].add(s)

674

snapfloor = min(parents_snaps[0]) + 1

678

snapfloor = min(parents_snaps[0]) + 1

675

_findsnapshots(revlog, snapshots, snapfloor)

679

_findsnapshots(revlog, snapshots, snapfloor)

676

# search for the highest "unrelated" revision

680

# search for the highest "unrelated" revision

677

#

681

#

678

# Adding snapshots used by "unrelated" revision increase the odd we

682

# Adding snapshots used by "unrelated" revision increase the odd we

679

# reuse an independant, yet better snapshot chain.

683

# reuse an independant, yet better snapshot chain.

680

#

684

#

681

# XXX instead of building a set of revisions, we could lazily enumerate

685

# XXX instead of building a set of revisions, we could lazily enumerate

682

# over the chains. That would be more efficient, however we stick to

686

# over the chains. That would be more efficient, however we stick to

683

# simple code for now.

687

# simple code for now.

684

all_revs = set()

688

all_revs = set()

685

for chain in candidate_chains:

689

for chain in candidate_chains:

686

all_revs.update(chain)

690

all_revs.update(chain)

687

other = None

691

other = None

688

for r in revlog.revs(prev, snapfloor):

692

for r in revlog.revs(prev, snapfloor):

689

if r not in all_revs:

693

if r not in all_revs:

690

other = r

694

other = r

691

break

695

break

692

if other is not None:

696

if other is not None:

693

# To avoid unfair competition, we won't use unrelated intermediate

697

# To avoid unfair competition, we won't use unrelated intermediate

694

# snapshot that are deeper than the ones from the parent delta

698

# snapshot that are deeper than the ones from the parent delta

695

# chain.

699

# chain.

696

max_depth = max(parents_snaps.keys())

700

max_depth = max(parents_snaps.keys())

697

chain = deltachain(other)

701

chain = deltachain(other)

698

for idx, s in enumerate(chain):

702

for idx, s in enumerate(chain):

699

if s < snapfloor:

703

if s < snapfloor:

700

continue

704

continue

701

if max_depth < idx:

705

if max_depth < idx:

702

break

706

break

703

if not revlog.issnapshot(s):

707

if not revlog.issnapshot(s):

704

break

708

break

705

parents_snaps[idx].add(s)

709

parents_snaps[idx].add(s)

706

# Test them as possible intermediate snapshot base

710

# Test them as possible intermediate snapshot base

707

# We test them from highest to lowest level. High level one are more

711

# We test them from highest to lowest level. High level one are more

708

# likely to result in small delta

712

# likely to result in small delta

709

floor = None

713

floor = None

710

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

714

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

711

siblings = set()

715

siblings = set()

712

for s in snaps:

716

for s in snaps:

713

siblings.update(snapshots[s])

717

siblings.update(snapshots[s])

714

# Before considering making a new intermediate snapshot, we check

718

# Before considering making a new intermediate snapshot, we check

715

# if an existing snapshot, children of base we consider, would be

719

# if an existing snapshot, children of base we consider, would be

716

# suitable.

720

# suitable.

717

#

721

#

718

# It give a change to reuse a delta chain "unrelated" to the

722

# It give a change to reuse a delta chain "unrelated" to the

719

# current revision instead of starting our own. Without such

723

# current revision instead of starting our own. Without such

720

# re-use, topological branches would keep reopening new chains.

724

# re-use, topological branches would keep reopening new chains.

721

# Creating more and more snapshot as the repository grow.

725

# Creating more and more snapshot as the repository grow.

722

726

723

if floor is not None:

727

if floor is not None:

724

# We only do this for siblings created after the one in our

728

# We only do this for siblings created after the one in our

725

# parent's delta chain. Those created before has less chances

729

# parent's delta chain. Those created before has less chances

726

# to be valid base since our ancestors had to create a new

730

# to be valid base since our ancestors had to create a new

727

# snapshot.

731

# snapshot.

728

siblings = [r for r in siblings if floor < r]

732

siblings = [r for r in siblings if floor < r]

729

yield tuple(sorted(siblings))

733

yield tuple(sorted(siblings))

730

# then test the base from our parent's delta chain.

734

# then test the base from our parent's delta chain.

731

yield tuple(sorted(snaps))

735

yield tuple(sorted(snaps))

732

floor = min(snaps)

736

floor = min(snaps)

733

# No suitable base found in the parent chain, search if any full

737

# No suitable base found in the parent chain, search if any full

734

# snapshots emitted since parent's base would be a suitable base for an

738

# snapshots emitted since parent's base would be a suitable base for an

735

# intermediate snapshot.

739

# intermediate snapshot.

736

#

740

#

737

# It give a chance to reuse a delta chain unrelated to the current

741

# It give a chance to reuse a delta chain unrelated to the current

738

# revisions instead of starting our own. Without such re-use,

742

# revisions instead of starting our own. Without such re-use,

739

# topological branches would keep reopening new full chains. Creating

743

# topological branches would keep reopening new full chains. Creating

740

# more and more snapshot as the repository grow.

744

# more and more snapshot as the repository grow.

741

yield tuple(snapshots[nullrev])

745

yield tuple(snapshots[nullrev])

742

746

743

if not sparse:

747

if not sparse:

744

# other approach failed try against prev to hopefully save us a

748

# other approach failed try against prev to hopefully save us a

745

# fulltext.

749

# fulltext.

746

yield (prev,)

750

yield (prev,)

747

751

748

class deltacomputer(object):

752

class deltacomputer(object):

749

def __init__(self, revlog):

753

def __init__(self, revlog):

750

self.revlog = revlog

754

self.revlog = revlog

751

755

752

def buildtext(self, revinfo, fh):

756

def buildtext(self, revinfo, fh):

753

"""Builds a fulltext version of a revision

757

"""Builds a fulltext version of a revision

754

758

755

revinfo: _revisioninfo instance that contains all needed info

759

revinfo: _revisioninfo instance that contains all needed info

756

fh: file handle to either the .i or the .d revlog file,

760

fh: file handle to either the .i or the .d revlog file,

757

depending on whether it is inlined or not

761

depending on whether it is inlined or not

758

"""

762

"""

759

btext = revinfo.btext

763

btext = revinfo.btext

760

if btext[0] is not None:

764

if btext[0] is not None:

761

return btext[0]

765

return btext[0]

762

766

763

revlog = self.revlog

767

revlog = self.revlog

764

cachedelta = revinfo.cachedelta

768

cachedelta = revinfo.cachedelta

765

baserev = cachedelta[0]

769

baserev = cachedelta[0]

766

delta = cachedelta[1]

770

delta = cachedelta[1]

767

771

768

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

772

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

769

revinfo.p1, revinfo.p2,

773

revinfo.p1, revinfo.p2,

770

revinfo.flags, revinfo.node)

774

revinfo.flags, revinfo.node)

771

return fulltext

775

return fulltext

772

776

773

def _builddeltadiff(self, base, revinfo, fh):

777

def _builddeltadiff(self, base, revinfo, fh):

774

revlog = self.revlog

778

revlog = self.revlog

775

t = self.buildtext(revinfo, fh)

779

t = self.buildtext(revinfo, fh)

776

if revlog.iscensored(base):

780

if revlog.iscensored(base):

777

# deltas based on a censored revision must replace the

781

# deltas based on a censored revision must replace the

778

# full content in one patch, so delta works everywhere

782

# full content in one patch, so delta works everywhere

779

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

783

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

780

delta = header + t

784

delta = header + t

781

else:

785

else:

782

ptext = revlog.revision(base, _df=fh, raw=True)

786

ptext = revlog.revision(base, _df=fh, raw=True)

783

delta = mdiff.textdiff(ptext, t)

787

delta = mdiff.textdiff(ptext, t)

784

788

785

return delta

789

return delta

786

790

787

def _builddeltainfo(self, revinfo, base, fh):

791

def _builddeltainfo(self, revinfo, base, fh):

788

# can we use the cached delta?

792

# can we use the cached delta?

789

delta = None

793

delta = None

790

if revinfo.cachedelta:

794

if revinfo.cachedelta:

791

cachebase, cachediff = revinfo.cachedelta

795

cachebase, cachediff = revinfo.cachedelta

792

#check if the diff still apply

796

#check if the diff still apply

793

currentbase = cachebase

797

currentbase = cachebase

794

while (currentbase != nullrev

798

while (currentbase != nullrev

795

and currentbase != base

799

and currentbase != base

796

and self.revlog.length(currentbase) == 0):

800

and self.revlog.length(currentbase) == 0):

797

currentbase = self.revlog.deltaparent(currentbase)

801

currentbase = self.revlog.deltaparent(currentbase)

798

if currentbase == base:

802

if currentbase == base:

799

delta = revinfo.cachedelta[1]

803

delta = revinfo.cachedelta[1]

800

if delta is None:

804

if delta is None:

801

delta = self._builddeltadiff(base, revinfo, fh)

805

delta = self._builddeltadiff(base, revinfo, fh)

802

revlog = self.revlog

806

revlog = self.revlog

803

header, data = revlog.compress(delta)

807

header, data = revlog.compress(delta)

804

deltalen = len(header) + len(data)

808

deltalen = len(header) + len(data)

805

chainbase = revlog.chainbase(base)

809

chainbase = revlog.chainbase(base)

806

offset = revlog.end(len(revlog) - 1)

810

offset = revlog.end(len(revlog) - 1)

807

dist = deltalen + offset - revlog.start(chainbase)

811

dist = deltalen + offset - revlog.start(chainbase)

808

if revlog._generaldelta:

812

if revlog._generaldelta:

809

deltabase = base

813

deltabase = base

810

else:

814

else:

811

deltabase = chainbase

815

deltabase = chainbase

812

chainlen, compresseddeltalen = revlog._chaininfo(base)

816

chainlen, compresseddeltalen = revlog._chaininfo(base)

813

chainlen += 1

817

chainlen += 1

814

compresseddeltalen += deltalen

818

compresseddeltalen += deltalen

815

819

816

revlog = self.revlog

820

revlog = self.revlog

817

snapshotdepth = None

821

snapshotdepth = None

818

if deltabase == nullrev:

822

if deltabase == nullrev:

819

snapshotdepth = 0

823

snapshotdepth = 0

820

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

824

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

821

# A delta chain should always be one full snapshot,

825

# A delta chain should always be one full snapshot,

822

# zero or more semi-snapshots, and zero or more deltas

826

# zero or more semi-snapshots, and zero or more deltas

823

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

827

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

824

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

828

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

825

snapshotdepth = len(revlog._deltachain(deltabase)[0])

829

snapshotdepth = len(revlog._deltachain(deltabase)[0])

826

830

827

return _deltainfo(dist, deltalen, (header, data), deltabase,

831

return _deltainfo(dist, deltalen, (header, data), deltabase,

828

chainbase, chainlen, compresseddeltalen,

832

chainbase, chainlen, compresseddeltalen,

829

snapshotdepth)

833

snapshotdepth)

830

834

831

def _fullsnapshotinfo(self, fh, revinfo):

835

def _fullsnapshotinfo(self, fh, revinfo):

832

curr = len(self.revlog)

836

curr = len(self.revlog)

833

rawtext = self.buildtext(revinfo, fh)

837

rawtext = self.buildtext(revinfo, fh)

834

data = self.revlog.compress(rawtext)

838

data = self.revlog.compress(rawtext)

835

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

839

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

836

deltabase = chainbase = curr

840

deltabase = chainbase = curr

837

snapshotdepth = 0

841

snapshotdepth = 0

838

chainlen = 1

842

chainlen = 1

839

843

840

return _deltainfo(dist, deltalen, data, deltabase,

844

return _deltainfo(dist, deltalen, data, deltabase,

841

chainbase, chainlen, compresseddeltalen,

845

chainbase, chainlen, compresseddeltalen,

842

snapshotdepth)

846

snapshotdepth)

843

847

844

def finddeltainfo(self, revinfo, fh):

848

def finddeltainfo(self, revinfo, fh):

845

"""Find an acceptable delta against a candidate revision

849

"""Find an acceptable delta against a candidate revision

846

850

847

revinfo: information about the revision (instance of _revisioninfo)

851

revinfo: information about the revision (instance of _revisioninfo)

848

fh: file handle to either the .i or the .d revlog file,

852

fh: file handle to either the .i or the .d revlog file,

849

depending on whether it is inlined or not

853

depending on whether it is inlined or not

850

854

851

Returns the first acceptable candidate revision, as ordered by

855

Returns the first acceptable candidate revision, as ordered by

852

_candidategroups

856

_candidategroups

853

857

854

If no suitable deltabase is found, we return delta info for a full

858

If no suitable deltabase is found, we return delta info for a full

855

snapshot.

859

snapshot.

856

"""

860

"""

857

if not revinfo.textlen:

861

if not revinfo.textlen:

858

return self._fullsnapshotinfo(fh, revinfo)

862

return self._fullsnapshotinfo(fh, revinfo)

859

863

860

# no delta for flag processor revision (see "candelta" for why)

864

# no delta for flag processor revision (see "candelta" for why)

861

# not calling candelta since only one revision needs test, also to

865

# not calling candelta since only one revision needs test, also to

862

# avoid overhead fetching flags again.

866

# avoid overhead fetching flags again.

863

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

867

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

864

return self._fullsnapshotinfo(fh, revinfo)

868

return self._fullsnapshotinfo(fh, revinfo)

865

869

866

cachedelta = revinfo.cachedelta

870

cachedelta = revinfo.cachedelta

867

p1 = revinfo.p1

871

p1 = revinfo.p1

868

p2 = revinfo.p2

872

p2 = revinfo.p2

869

revlog = self.revlog

873

revlog = self.revlog

870

874

871

deltainfo = None

875

deltainfo = None

872

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

876

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

873

groups = _candidategroups(self.revlog, revinfo.textlen,

877

groups = _candidategroups(self.revlog, revinfo.textlen,

874

p1r, p2r, cachedelta)

878

p1r, p2r, cachedelta)

875

candidaterevs = next(groups)

879

candidaterevs = next(groups)

876

while candidaterevs is not None:

880

while candidaterevs is not None:

877

nominateddeltas = []

881

nominateddeltas = []

878

if deltainfo is not None:

882

if deltainfo is not None:

879

# if we already found a good delta,

883

# if we already found a good delta,

880

# challenge it against refined candidates

884

# challenge it against refined candidates

881

nominateddeltas.append(deltainfo)

885

nominateddeltas.append(deltainfo)

882

for candidaterev in candidaterevs:

886

for candidaterev in candidaterevs:

883

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

887

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

884

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

888

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

885

nominateddeltas.append(candidatedelta)

889

nominateddeltas.append(candidatedelta)

886

if nominateddeltas:

890

if nominateddeltas:

887

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

891

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

888

if deltainfo is not None:

892

if deltainfo is not None:

889

candidaterevs = groups.send(deltainfo.base)

893

candidaterevs = groups.send(deltainfo.base)

890

else:

894

else:

891

candidaterevs = next(groups)

895

candidaterevs = next(groups)

892

896

893

if deltainfo is None:

897

if deltainfo is None:

894

deltainfo = self._fullsnapshotinfo(fh, revinfo)

898

deltainfo = self._fullsnapshotinfo(fh, revinfo)

895

return deltainfo

899

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
-                def __init__(self, data, density=0.5, mingap=0):
+                def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
+                    self._snapshot = set(snapshot)
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
+                def issnapshot(self, rev):
+                    return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo