upstream/mercurial-mirror Commit - r39373:37957e07

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import heapq

12

import heapq

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

RevlogError = error.RevlogError

35

RevlogError = error.RevlogError

36

CensoredNodeError = error.CensoredNodeError

36

CensoredNodeError = error.CensoredNodeError

37

38

# maximum <delta-chain-data>/<revision-text-length> ratio

38

# maximum <delta-chain-data>/<revision-text-length> ratio

39

LIMIT_DELTA2TEXT = 2

39

LIMIT_DELTA2TEXT = 2

40

41

class _testrevlog(object):

41

class _testrevlog(object):

42

"""minimalist fake revlog to use in doctests"""

42

"""minimalist fake revlog to use in doctests"""

43

44

def __init__(self, data, density=0.5, mingap=0):

44

def __init__(self, data, density=0.5, mingap=0):

45

"""data is an list of revision payload boundaries"""

45

"""data is an list of revision payload boundaries"""

46

self._data = data

46

self._data = data

47

self._srdensitythreshold = density

47

self._srdensitythreshold = density

48

self._srmingapsize = mingap

48

self._srmingapsize = mingap

49

50

def start(self, rev):

50

def start(self, rev):

51

if rev == 0:

51

if rev == 0:

52

return 0

52

return 0

53

return self._data[rev - 1]

53

return self._data[rev - 1]

54

55

def end(self, rev):

55

def end(self, rev):

56

return self._data[rev]

56

return self._data[rev]

57

58

def length(self, rev):

58

def length(self, rev):

59

return self.end(rev) - self.start(rev)

59

return self.end(rev) - self.start(rev)

60

61

def __len__(self):

61

def __len__(self):

62

return len(self._data)

62

return len(self._data)

63

64

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

64

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

65

"""slice revs to reduce the amount of unrelated data to be read from disk.

65

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

67

``revs`` is sliced into groups that should be read in one time.

67

``revs`` is sliced into groups that should be read in one time.

68

Assume that revs are sorted.

68

Assume that revs are sorted.

69

70

The initial chunk is sliced until the overall density (payload/chunks-span

70

The initial chunk is sliced until the overall density (payload/chunks-span

71

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

71

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

`revlog._srmingapsize` is skipped.

72

`revlog._srmingapsize` is skipped.

73

74

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

74

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

For consistency with other slicing choice, this limit won't go lower than

75

For consistency with other slicing choice, this limit won't go lower than

76

`revlog._srmingapsize`.

76

`revlog._srmingapsize`.

77

78

If individual revisions chunk are larger than this limit, they will still

78

If individual revisions chunk are larger than this limit, they will still

79

be raised individually.

79

be raised individually.

80

81

>>> revlog = _testrevlog([

81

>>> revlog = _testrevlog([

82

... 5, #00 (5)

82

... 5, #00 (5)

83

... 10, #01 (5)

83

... 10, #01 (5)

84

... 12, #02 (2)

84

... 12, #02 (2)

85

... 12, #03 (empty)

85

... 12, #03 (empty)

86

... 27, #04 (15)

86

... 27, #04 (15)

87

... 31, #05 (4)

87

... 31, #05 (4)

88

... 31, #06 (empty)

88

... 31, #06 (empty)

89

... 42, #07 (11)

89

... 42, #07 (11)

90

... 47, #08 (5)

90

... 47, #08 (5)

91

... 47, #09 (empty)

91

... 47, #09 (empty)

92

... 48, #10 (1)

92

... 48, #10 (1)

93

... 51, #11 (3)

93

... 51, #11 (3)

94

... 74, #12 (23)

94

... 74, #12 (23)

95

... 85, #13 (11)

95

... 85, #13 (11)

96

... 86, #14 (1)

96

... 86, #14 (1)

97

... 91, #15 (5)

97

... 91, #15 (5)

98

... ])

98

... ])

99

100

>>> list(slicechunk(revlog, list(range(16))))

100

>>> list(slicechunk(revlog, list(range(16))))

101

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

101

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

>>> list(slicechunk(revlog, [0, 15]))

102

>>> list(slicechunk(revlog, [0, 15]))

103

[[0], [15]]

103

[[0], [15]]

104

>>> list(slicechunk(revlog, [0, 11, 15]))

104

>>> list(slicechunk(revlog, [0, 11, 15]))

105

[[0], [11], [15]]

105

[[0], [11], [15]]

106

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

106

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

[[0], [11, 13, 15]]

107

[[0], [11, 13, 15]]

108

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

108

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

[[1, 2], [5, 8, 10, 11], [14]]

109

[[1, 2], [5, 8, 10, 11], [14]]

110

111

Slicing with a maximum chunk size

111

Slicing with a maximum chunk size

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

[[0], [11], [13], [15]]

113

[[0], [11], [13], [15]]

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

[[0], [11], [13, 15]]

115

[[0], [11], [13, 15]]

116

"""

116

"""

117

if targetsize is not None:

117

if targetsize is not None:

118

targetsize = max(targetsize, revlog._srmingapsize)

118

targetsize = max(targetsize, revlog._srmingapsize)

119

# targetsize should not be specified when evaluating delta candidates:

119

# targetsize should not be specified when evaluating delta candidates:

120

# * targetsize is used to ensure we stay within specification when reading,

120

# * targetsize is used to ensure we stay within specification when reading,

121

# * deltainfo is used to pick are good delta chain when writing.

121

# * deltainfo is used to pick are good delta chain when writing.

122

if not (deltainfo is None or targetsize is None):

122

if not (deltainfo is None or targetsize is None):

123

msg = 'cannot use `targetsize` with a `deltainfo`'

123

msg = 'cannot use `targetsize` with a `deltainfo`'

124

raise error.ProgrammingError(msg)

124

raise error.ProgrammingError(msg)

125

for chunk in _slicechunktodensity(revlog, revs,

125

for chunk in _slicechunktodensity(revlog, revs,

126

deltainfo,

126

deltainfo,

127

revlog._srdensitythreshold,

127

revlog._srdensitythreshold,

128

revlog._srmingapsize):

128

revlog._srmingapsize):

129

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

129

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

yield subchunk

130

yield subchunk

131

132

def _slicechunktosize(revlog, revs, targetsize=None):

132

def _slicechunktosize(revlog, revs, targetsize=None):

133

"""slice revs to match the target size

133

"""slice revs to match the target size

134

135

This is intended to be used on chunk that density slicing selected by that

135

This is intended to be used on chunk that density slicing selected by that

136

are still too large compared to the read garantee of revlog. This might

136

are still too large compared to the read garantee of revlog. This might

137

happens when "minimal gap size" interrupted the slicing or when chain are

137

happens when "minimal gap size" interrupted the slicing or when chain are

138

built in a way that create large blocks next to each other.

138

built in a way that create large blocks next to each other.

139

140

>>> revlog = _testrevlog([

140

>>> revlog = _testrevlog([

141

... 3, #0 (3)

141

... 3, #0 (3)

142

... 5, #1 (2)

142

... 5, #1 (2)

143

... 6, #2 (1)

143

... 6, #2 (1)

144

... 8, #3 (2)

144

... 8, #3 (2)

145

... 8, #4 (empty)

145

... 8, #4 (empty)

146

... 11, #5 (3)

146

... 11, #5 (3)

147

... 12, #6 (1)

147

... 12, #6 (1)

148

... 13, #7 (1)

148

... 13, #7 (1)

149

... 14, #8 (1)

149

... 14, #8 (1)

150

... ])

150

... ])

151

152

Cases where chunk is already small enough

152

Cases where chunk is already small enough

153

>>> list(_slicechunktosize(revlog, [0], 3))

153

>>> list(_slicechunktosize(revlog, [0], 3))

154

[[0]]

154

[[0]]

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

[[6, 7]]

156

[[6, 7]]

157

>>> list(_slicechunktosize(revlog, [0], None))

157

>>> list(_slicechunktosize(revlog, [0], None))

158

[[0]]

158

[[0]]

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

[[6, 7]]

160

[[6, 7]]

161

162

cases where we need actual slicing

162

cases where we need actual slicing

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

[[0], [1]]

164

[[0], [1]]

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

[[1], [3]]

166

[[1], [3]]

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

[[1, 2], [3]]

168

[[1, 2], [3]]

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

[[3], [5]]

170

[[3], [5]]

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

[[3], [5]]

172

[[3], [5]]

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

[[5], [6, 7, 8]]

174

[[5], [6, 7, 8]]

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

178

Case with too large individual chunk (must return valid chunk)

178

Case with too large individual chunk (must return valid chunk)

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

[[0], [1]]

180

[[0], [1]]

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

[[1], [3]]

182

[[1], [3]]

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

[[3], [5]]

184

[[3], [5]]

185

"""

185

"""

186

assert targetsize is None or 0 <= targetsize

186

assert targetsize is None or 0 <= targetsize

187

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

187

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

188

yield revs

188

yield revs

189

return

189

return

190

191

startrevidx = 0

191

startrevidx = 0

192

startdata = revlog.start(revs[0])

192

startdata = revlog.start(revs[0])

193

endrevidx = 0

193

endrevidx = 0

194

iterrevs = enumerate(revs)

194

iterrevs = enumerate(revs)

195

next(iterrevs) # skip first rev.

195

next(iterrevs) # skip first rev.

196

for idx, r in iterrevs:

196

for idx, r in iterrevs:

197

span = revlog.end(r) - startdata

197

span = revlog.end(r) - startdata

198

if span <= targetsize:

198

if span <= targetsize:

199

endrevidx = idx

199

endrevidx = idx

200

else:

200

else:

201

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

201

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

202

if chunk:

202

if chunk:

203

yield chunk

203

yield chunk

204

startrevidx = idx

204

startrevidx = idx

205

startdata = revlog.start(r)

205

startdata = revlog.start(r)

206

endrevidx = idx

206

endrevidx = idx

207

yield _trimchunk(revlog, revs, startrevidx)

207

yield _trimchunk(revlog, revs, startrevidx)

208

209

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

209

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

210

mingapsize=0):

210

mingapsize=0):

211

"""slice revs to reduce the amount of unrelated data to be read from disk.

211

"""slice revs to reduce the amount of unrelated data to be read from disk.

212

213

``revs`` is sliced into groups that should be read in one time.

213

``revs`` is sliced into groups that should be read in one time.

214

Assume that revs are sorted.

214

Assume that revs are sorted.

215

216

``deltainfo`` is a _deltainfo instance of a revision that we would append

216

``deltainfo`` is a _deltainfo instance of a revision that we would append

217

to the top of the revlog.

217

to the top of the revlog.

218

219

The initial chunk is sliced until the overall density (payload/chunks-span

219

The initial chunk is sliced until the overall density (payload/chunks-span

220

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

220

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

221

skipped.

221

skipped.

222

223

>>> revlog = _testrevlog([

223

>>> revlog = _testrevlog([

224

... 5, #00 (5)

224

... 5, #00 (5)

225

... 10, #01 (5)

225

... 10, #01 (5)

226

... 12, #02 (2)

226

... 12, #02 (2)

227

... 12, #03 (empty)

227

... 12, #03 (empty)

228

... 27, #04 (15)

228

... 27, #04 (15)

229

... 31, #05 (4)

229

... 31, #05 (4)

230

... 31, #06 (empty)

230

... 31, #06 (empty)

231

... 42, #07 (11)

231

... 42, #07 (11)

232

... 47, #08 (5)

232

... 47, #08 (5)

233

... 47, #09 (empty)

233

... 47, #09 (empty)

234

... 48, #10 (1)

234

... 48, #10 (1)

235

... 51, #11 (3)

235

... 51, #11 (3)

236

... 74, #12 (23)

236

... 74, #12 (23)

237

... 85, #13 (11)

237

... 85, #13 (11)

238

... 86, #14 (1)

238

... 86, #14 (1)

239

... 91, #15 (5)

239

... 91, #15 (5)

240

... ])

240

... ])

241

242

>>> list(_slicechunktodensity(revlog, list(range(16))))

242

>>> list(_slicechunktodensity(revlog, list(range(16))))

243

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

243

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

244

>>> list(_slicechunktodensity(revlog, [0, 15]))

244

>>> list(_slicechunktodensity(revlog, [0, 15]))

245

[[0], [15]]

245

[[0], [15]]

246

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

246

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

247

[[0], [11], [15]]

247

[[0], [11], [15]]

248

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

248

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

249

[[0], [11, 13, 15]]

249

[[0], [11, 13, 15]]

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

251

[[1, 2], [5, 8, 10, 11], [14]]

251

[[1, 2], [5, 8, 10, 11], [14]]

252

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

252

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

... mingapsize=20))

253

... mingapsize=20))

254

[[1, 2, 3, 5, 8, 10, 11], [14]]

254

[[1, 2, 3, 5, 8, 10, 11], [14]]

255

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

255

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

... targetdensity=0.95))

256

... targetdensity=0.95))

257

[[1, 2], [5], [8, 10, 11], [14]]

257

[[1, 2], [5], [8, 10, 11], [14]]

258

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

258

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

259

... targetdensity=0.95, mingapsize=12))

259

... targetdensity=0.95, mingapsize=12))

260

[[1, 2], [5, 8, 10, 11], [14]]

260

[[1, 2], [5, 8, 10, 11], [14]]

261

"""

261

"""

262

start = revlog.start

262

start = revlog.start

263

length = revlog.length

263

length = revlog.length

264

265

if len(revs) <= 1:

265

if len(revs) <= 1:

266

yield revs

266

yield revs

267

return

267

return

268

269

nextrev = len(revlog)

269

nextrev = len(revlog)

270

nextoffset = revlog.end(nextrev - 1)

270

nextoffset = revlog.end(nextrev - 1)

271

272

if deltainfo is None:

272

if deltainfo is None:

273

deltachainspan = segmentspan(revlog, revs)

273

deltachainspan = segmentspan(revlog, revs)

274

chainpayload = sum(length(r) for r in revs)

274

chainpayload = sum(length(r) for r in revs)

275

else:

275

else:

276

deltachainspan = deltainfo.distance

276

deltachainspan = deltainfo.distance

277

chainpayload = deltainfo.compresseddeltalen

277

chainpayload = deltainfo.compresseddeltalen

278

279

if deltachainspan < mingapsize:

279

if deltachainspan < mingapsize:

280

yield revs

280

yield revs

281

return

281

return

282

283

readdata = deltachainspan

283

readdata = deltachainspan

284

285

if deltachainspan:

285

if deltachainspan:

286

density = chainpayload / float(deltachainspan)

286

density = chainpayload / float(deltachainspan)

287

else:

287

else:

288

density = 1.0

288

density = 1.0

289

290

if density >= targetdensity:

290

if density >= targetdensity:

291

yield revs

291

yield revs

292

return

292

return

293

294

if deltainfo is not None and deltainfo.deltalen:

294

if deltainfo is not None and deltainfo.deltalen:

295

revs = list(revs)

295

revs = list(revs)

296

revs.append(nextrev)

296

revs.append(nextrev)

297

298

# Store the gaps in a heap to have them sorted by decreasing size

298

# Store the gaps in a heap to have them sorted by decreasing size

299

gapsheap = []

299

gapsheap = []

300

heapq.heapify(gapsheap)

300

heapq.heapify(gapsheap)

301

prevend = None

301

prevend = None

302

for i, rev in enumerate(revs):

302

for i, rev in enumerate(revs):

303

if rev < nextrev:

303

if rev < nextrev:

304

revstart = start(rev)

304

revstart = start(rev)

305

revlen = length(rev)

305

revlen = length(rev)

306

else:

306

else:

307

revstart = nextoffset

307

revstart = nextoffset

308

revlen = deltainfo.deltalen

308

revlen = deltainfo.deltalen

309

310

# Skip empty revisions to form larger holes

310

# Skip empty revisions to form larger holes

311

if revlen == 0:

311

if revlen == 0:

312

continue

312

continue

313

314

if prevend is not None:

314

if prevend is not None:

315

gapsize = revstart - prevend

315

gapsize = revstart - prevend

316

# only consider holes that are large enough

316

# only consider holes that are large enough

317

if gapsize > mingapsize:

317

if gapsize > mingapsize:

318

heapq.heappush(gapsheap, (-gapsize, i))

318

heapq.heappush(gapsheap, (-gapsize, i))

319

320

prevend = revstart + revlen

320

prevend = revstart + revlen

321

322

# Collect the indices of the largest holes until the density is acceptable

322

# Collect the indices of the largest holes until the density is acceptable

323

indicesheap = []

323

indicesheap = []

324

heapq.heapify(indicesheap)

324

heapq.heapify(indicesheap)

325

while gapsheap and density < targetdensity:

325

while gapsheap and density < targetdensity:

326

oppgapsize, gapidx = heapq.heappop(gapsheap)

326

oppgapsize, gapidx = heapq.heappop(gapsheap)

327

328

heapq.heappush(indicesheap, gapidx)

328

heapq.heappush(indicesheap, gapidx)

329

330

# the gap sizes are stored as negatives to be sorted decreasingly

330

# the gap sizes are stored as negatives to be sorted decreasingly

331

# by the heap

331

# by the heap

332

readdata -= (-oppgapsize)

332

readdata -= (-oppgapsize)

333

if readdata > 0:

333

if readdata > 0:

334

density = chainpayload / float(readdata)

334

density = chainpayload / float(readdata)

335

else:

335

else:

336

density = 1.0

336

density = 1.0

337

338

# Cut the revs at collected indices

338

# Cut the revs at collected indices

339

previdx = 0

339

previdx = 0

340

while indicesheap:

340

while indicesheap:

341

idx = heapq.heappop(indicesheap)

341

idx = heapq.heappop(indicesheap)

342

343

chunk = _trimchunk(revlog, revs, previdx, idx)

343

chunk = _trimchunk(revlog, revs, previdx, idx)

344

if chunk:

344

if chunk:

345

yield chunk

345

yield chunk

346

347

previdx = idx

347

previdx = idx

348

349

chunk = _trimchunk(revlog, revs, previdx)

349

chunk = _trimchunk(revlog, revs, previdx)

350

if chunk:

350

if chunk:

351

yield chunk

351

yield chunk

352

353

def _trimchunk(revlog, revs, startidx, endidx=None):

353

def _trimchunk(revlog, revs, startidx, endidx=None):

354

"""returns revs[startidx:endidx] without empty trailing revs

354

"""returns revs[startidx:endidx] without empty trailing revs

355

356

Doctest Setup

356

Doctest Setup

357

>>> revlog = _testrevlog([

357

>>> revlog = _testrevlog([

358

... 5, #0

358

... 5, #0

359

... 10, #1

359

... 10, #1

360

... 12, #2

360

... 12, #2

361

... 12, #3 (empty)

361

... 12, #3 (empty)

362

... 17, #4

362

... 17, #4

363

... 21, #5

363

... 21, #5

364

... 21, #6 (empty)

364

... 21, #6 (empty)

365

... ])

365

... ])

366

367

Contiguous cases:

367

Contiguous cases:

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

369

[0, 1, 2, 3, 4, 5]

369

[0, 1, 2, 3, 4, 5]

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

371

[0, 1, 2, 3, 4]

371

[0, 1, 2, 3, 4]

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

373

[0, 1, 2]

373

[0, 1, 2]

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

375

[2]

375

[2]

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

377

[3, 4, 5]

377

[3, 4, 5]

378

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

378

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

379

[3, 4]

379

[3, 4]

380

381

Discontiguous cases:

381

Discontiguous cases:

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

383

[1, 3, 5]

383

[1, 3, 5]

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

385

[1]

385

[1]

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

387

[3, 5]

387

[3, 5]

388

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

388

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

389

[3, 5]

389

[3, 5]

390

"""

390

"""

391

length = revlog.length

391

length = revlog.length

392

393

if endidx is None:

393

if endidx is None:

394

endidx = len(revs)

394

endidx = len(revs)

395

396

# If we have a non-emtpy delta candidate, there are nothing to trim

396

# If we have a non-emtpy delta candidate, there are nothing to trim

397

if revs[endidx - 1] < len(revlog):

397

if revs[endidx - 1] < len(revlog):

398

# Trim empty revs at the end, except the very first revision of a chain

398

# Trim empty revs at the end, except the very first revision of a chain

399

while (endidx > 1

399

while (endidx > 1

400

and endidx > startidx

400

and endidx > startidx

401

and length(revs[endidx - 1]) == 0):

401

and length(revs[endidx - 1]) == 0):

402

endidx -= 1

402

endidx -= 1

403

404

return revs[startidx:endidx]

404

return revs[startidx:endidx]

405

406

def segmentspan(revlog, revs, deltainfo=None):

406

def segmentspan(revlog, revs, deltainfo=None):

407

"""Get the byte span of a segment of revisions

407

"""Get the byte span of a segment of revisions

408

409

revs is a sorted array of revision numbers

409

revs is a sorted array of revision numbers

410

411

>>> revlog = _testrevlog([

411

>>> revlog = _testrevlog([

412

... 5, #0

412

... 5, #0

413

... 10, #1

413

... 10, #1

414

... 12, #2

414

... 12, #2

415

... 12, #3 (empty)

415

... 12, #3 (empty)

416

... 17, #4

416

... 17, #4

417

... ])

417

... ])

418

419

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

419

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

420

17

420

17

421

>>> segmentspan(revlog, [0, 4])

421

>>> segmentspan(revlog, [0, 4])

422

17

422

17

423

>>> segmentspan(revlog, [3, 4])

423

>>> segmentspan(revlog, [3, 4])

424

5

424

5

425

>>> segmentspan(revlog, [1, 2, 3,])

425

>>> segmentspan(revlog, [1, 2, 3,])

426

7

426

7

427

>>> segmentspan(revlog, [1, 3])

427

>>> segmentspan(revlog, [1, 3])

428

7

428

7

429

"""

429

"""

430

if not revs:

430

if not revs:

431

return 0

431

return 0

432

if deltainfo is not None and len(revlog) <= revs[-1]:

432

if deltainfo is not None and len(revlog) <= revs[-1]:

433

if len(revs) == 1:

433

if len(revs) == 1:

434

return deltainfo.deltalen

434

return deltainfo.deltalen

435

offset = revlog.end(len(revlog) - 1)

435

offset = revlog.end(len(revlog) - 1)

436

end = deltainfo.deltalen + offset

436

end = deltainfo.deltalen + offset

437

else:

437

else:

438

end = revlog.end(revs[-1])

438

end = revlog.end(revs[-1])

439

return end - revlog.start(revs[0])

439

return end - revlog.start(revs[0])

440

441

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

441

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

442

"""build full text from a (base, delta) pair and other metadata"""

442

"""build full text from a (base, delta) pair and other metadata"""

443

# special case deltas which replace entire base; no need to decode

443

# special case deltas which replace entire base; no need to decode

444

# base revision. this neatly avoids censored bases, which throw when

444

# base revision. this neatly avoids censored bases, which throw when

445

# they're decoded.

445

# they're decoded.

446

hlen = struct.calcsize(">lll")

446

hlen = struct.calcsize(">lll")

447

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

447

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

448

len(delta) - hlen):

448

len(delta) - hlen):

449

fulltext = delta[hlen:]

449

fulltext = delta[hlen:]

450

else:

450

else:

451

# deltabase is rawtext before changed by flag processors, which is

451

# deltabase is rawtext before changed by flag processors, which is

452

# equivalent to non-raw text

452

# equivalent to non-raw text

453

basetext = revlog.revision(baserev, _df=fh, raw=False)

453

basetext = revlog.revision(baserev, _df=fh, raw=False)

454

fulltext = mdiff.patch(basetext, delta)

454

fulltext = mdiff.patch(basetext, delta)

455

456

try:

456

try:

457

res = revlog._processflags(fulltext, flags, 'read', raw=True)

457

res = revlog._processflags(fulltext, flags, 'read', raw=True)

458

fulltext, validatehash = res

458

fulltext, validatehash = res

459

if validatehash:

459

if validatehash:

460

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

460

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

461

if flags & REVIDX_ISCENSORED:

461

if flags & REVIDX_ISCENSORED:

462

raise RevlogError(_('node %s is not censored') % expectednode)

462

raise RevlogError(_('node %s is not censored') % expectednode)

463

except CensoredNodeError:

463

except CensoredNodeError:

464

# must pass the censored index flag to add censored revisions

464

# must pass the censored index flag to add censored revisions

465

if not flags & REVIDX_ISCENSORED:

465

if not flags & REVIDX_ISCENSORED:

466

raise

466

raise

467

return fulltext

467

return fulltext

468

469

@attr.s(slots=True, frozen=True)

469

@attr.s(slots=True, frozen=True)

470

class _deltainfo(object):

470

class _deltainfo(object):

471

distance = attr.ib()

471

distance = attr.ib()

472

deltalen = attr.ib()

472

deltalen = attr.ib()

473

data = attr.ib()

473

data = attr.ib()

474

base = attr.ib()

474

base = attr.ib()

475

chainbase = attr.ib()

475

chainbase = attr.ib()

476

chainlen = attr.ib()

476

chainlen = attr.ib()

477

compresseddeltalen = attr.ib()

477

compresseddeltalen = attr.ib()

478

snapshotdepth = attr.ib()

478

snapshotdepth = attr.ib()

479

480

def isgooddeltainfo(revlog, deltainfo, revinfo):

480

def isgooddeltainfo(revlog, deltainfo, revinfo):

481

"""Returns True if the given delta is good. Good means that it is within

481

"""Returns True if the given delta is good. Good means that it is within

482

the disk span, disk size, and chain length bounds that we know to be

482

the disk span, disk size, and chain length bounds that we know to be

483

performant."""

483

performant."""

484

if deltainfo is None:

484

if deltainfo is None:

485

return False

485

return False

486

487

# - 'deltainfo.distance' is the distance from the base revision --

487

# - 'deltainfo.distance' is the distance from the base revision --

488

# bounding it limits the amount of I/O we need to do.

488

# bounding it limits the amount of I/O we need to do.

489

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

489

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

490

# deltas we need to apply -- bounding it limits the amount of CPU

490

# deltas we need to apply -- bounding it limits the amount of CPU

491

# we consume.

491

# we consume.

492

493

if revlog._sparserevlog:

493

if revlog._sparserevlog:

494

# As sparse-read will be used, we can consider that the distance,

494

# As sparse-read will be used, we can consider that the distance,

495

# instead of being the span of the whole chunk,

495

# instead of being the span of the whole chunk,

496

# is the span of the largest read chunk

496

# is the span of the largest read chunk

497

base = deltainfo.base

497

base = deltainfo.base

498

499

if base != nullrev:

499

if base != nullrev:

500

deltachain = revlog._deltachain(base)[0]

500

deltachain = revlog._deltachain(base)[0]

501

else:

501

else:

502

deltachain = []

502

deltachain = []

503

504

# search for the first non-snapshot revision

504

# search for the first non-snapshot revision

505

for idx, r in enumerate(deltachain):

505

for idx, r in enumerate(deltachain):

506

if not revlog.issnapshot(r):

506

if not revlog.issnapshot(r):

507

break

507

break

508

deltachain = deltachain[idx:]

508

deltachain = deltachain[idx:]

509

chunks = slicechunk(revlog, deltachain, deltainfo)

509

chunks = slicechunk(revlog, deltachain, deltainfo)

510

all_span = [segmentspan(revlog, revs, deltainfo)

510

all_span = [segmentspan(revlog, revs, deltainfo)

511

for revs in chunks]

511

for revs in chunks]

512

distance = max(all_span)

512

distance = max(all_span)

513

else:

513

else:

514

distance = deltainfo.distance

514

distance = deltainfo.distance

515

516

textlen = revinfo.textlen

516

textlen = revinfo.textlen

517

defaultmax = textlen * 4

517

defaultmax = textlen * 4

518

maxdist = revlog._maxdeltachainspan

518

maxdist = revlog._maxdeltachainspan

519

if not maxdist:

519

if not maxdist:

520

maxdist = distance # ensure the conditional pass

520

maxdist = distance # ensure the conditional pass

521

maxdist = max(maxdist, defaultmax)

521

maxdist = max(maxdist, defaultmax)

522

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

522

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

523

# In multiple place, we are ignoring irrelevant data range below a

523

# In multiple place, we are ignoring irrelevant data range below a

524

# certain size. Be also apply this tradeoff here and relax span

524

# certain size. Be also apply this tradeoff here and relax span

525

# constraint for small enought content.

525

# constraint for small enought content.

526

maxdist = revlog._srmingapsize

526

maxdist = revlog._srmingapsize

527

528

# Bad delta from read span:

528

# Bad delta from read span:

529

#

529

#

530

# If the span of data read is larger than the maximum allowed.

530

# If the span of data read is larger than the maximum allowed.

531

if maxdist < distance:

531

if maxdist < distance:

532

return False

532

return False

533

534

# Bad delta from new delta size:

534

# Bad delta from new delta size:

535

#

535

#

536

# If the delta size is larger than the target text, storing the

536

# If the delta size is larger than the target text, storing the

537

# delta will be inefficient.

537

# delta will be inefficient.

538

if textlen < deltainfo.deltalen:

538

if textlen < deltainfo.deltalen:

539

return False

539

return False

540

541

# Bad delta from cumulated payload size:

541

# Bad delta from cumulated payload size:

542

#

542

#

543

# If the sum of delta get larger than K * target text length.

543

# If the sum of delta get larger than K * target text length.

544

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

544

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

545

return False

545

return False

546

547

# Bad delta from chain length:

547

# Bad delta from chain length:

548

#

548

#

549

# If the number of delta in the chain gets too high.

549

# If the number of delta in the chain gets too high.

550

if (revlog._maxchainlen

550

if (revlog._maxchainlen

551

and revlog._maxchainlen < deltainfo.chainlen):

551

and revlog._maxchainlen < deltainfo.chainlen):

552

return False

552

return False

553

554

# bad delta from intermediate snapshot size limit

554

# bad delta from intermediate snapshot size limit

555

#

555

#

556

# If an intermediate snapshot size is higher than the limit. The

556

# If an intermediate snapshot size is higher than the limit. The

557

# limit exist to prevent endless chain of intermediate delta to be

557

# limit exist to prevent endless chain of intermediate delta to be

558

# created.

558

# created.

559

if (deltainfo.snapshotdepth is not None and

559

if (deltainfo.snapshotdepth is not None and

560

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

560

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

561

return False

561

return False

562

563

# bad delta if new intermediate snapshot is larger than the previous

563

# bad delta if new intermediate snapshot is larger than the previous

564

# snapshot

564

# snapshot

565

if (deltainfo.snapshotdepth

565

if (deltainfo.snapshotdepth

566

and revlog.length(deltainfo.base) < deltainfo.deltalen):

566

and revlog.length(deltainfo.base) < deltainfo.deltalen):

567

return False

567

return False

568

569

return True

569

return True

570

571

def _candidategroups(revlog, p1, p2, cachedelta):

571

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

572

"""Provides group of revision to be tested as delta base

572

"""Provides group of revision to be tested as delta base

573

574

This top level function focus on emitting groups with unique and worthwhile

574

This top level function focus on emitting groups with unique and worthwhile

575

content. See _raw_candidate_groups for details about the group order.

575

content. See _raw_candidate_groups for details about the group order.

576

"""

576

"""

577

# should we try to build a delta?

577

# should we try to build a delta?

578

if not (len(revlog) and revlog._storedeltachains):

578

if not (len(revlog) and revlog._storedeltachains):

579

return

579

return

580

581

deltalength = revlog.length

582

deltaparent = revlog.deltaparent

583

584

deltas_limit = textlen * LIMIT_DELTA2TEXT

585

581

tested = set([nullrev])

586

tested = set([nullrev])

582

for ~~group~~ in _rawgroups(revlog, p1, p2, cachedelta):

587

for temptative in _rawgroups(revlog, p1, p2, cachedelta):

583

group = tuple(r for r in group if r not in tested)

588

group = []

584

tested.update(group)

589

for rev in temptative:

590

# skip over empty delta (no need to include them in a chain)

591

while not (rev == nullrev or rev in tested or deltalength(rev)):

592

rev = deltaparent(rev)

593

tested.add(rev)

594

# filter out revision we tested already

595

if rev in tested:

596

continue

597

tested.add(rev)

598

# filter out delta base that will never produce good delta

599

if deltas_limit < revlog.length(rev):

600

continue

601

# no need to try a delta against nullrev, this will be done as a

602

# last resort.

603

if rev == nullrev:

604

continue

605

# no delta for rawtext-changing revs (see "candelta" for why)

606

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

607

continue

608

group.append(rev)

585

if group:

609

if group:

586

yield group

610

yield tuple(group)

587

611

588

def _rawgroups(revlog, p1, p2, cachedelta):

612

def _rawgroups(revlog, p1, p2, cachedelta):

589

"""Provides group of revision to be tested as delta base

613

"""Provides group of revision to be tested as delta base

590

614

591

This lower level function focus on emitting delta theorically interresting

615

This lower level function focus on emitting delta theorically interresting

592

without looking it any practical details.

616

without looking it any practical details.

593

617

594

The group order aims at providing fast or small candidates first.

618

The group order aims at providing fast or small candidates first.

595

"""

619

"""

596

gdelta = revlog._generaldelta

620

gdelta = revlog._generaldelta

597

curr = len(revlog)

621

curr = len(revlog)

598

prev = curr - 1

622

prev = curr - 1

599

623

600

# should we try to build a delta?

624

# should we try to build a delta?

601

if prev != nullrev and revlog._storedeltachains:

625

if prev != nullrev and revlog._storedeltachains:

602

tested = set()

626

tested = set()

603

# This condition is true most of the time when processing

627

# This condition is true most of the time when processing

604

# changegroup data into a generaldelta repo. The only time it

628

# changegroup data into a generaldelta repo. The only time it

605

# isn't true is if this is the first revision in a delta chain

629

# isn't true is if this is the first revision in a delta chain

606

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

630

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

607

if cachedelta and gdelta and revlog._lazydeltabase:

631

if cachedelta and gdelta and revlog._lazydeltabase:

608

# Assume what we received from the server is a good choice

632

# Assume what we received from the server is a good choice

609

# build delta will reuse the cache

633

# build delta will reuse the cache

610

yield (cachedelta[0],)

634

yield (cachedelta[0],)

611

tested.add(cachedelta[0])

635

tested.add(cachedelta[0])

612

636

613

# This condition is true most of the time when processing

637

# This condition is true most of the time when processing

614

# changegroup data into a generaldelta repo. The only time it

638

# changegroup data into a generaldelta repo. The only time it

615

# isn't true is if this is the first revision in a delta chain

639

# isn't true is if this is the first revision in a delta chain

616

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

640

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

617

if cachedelta and gdelta and revlog._lazydeltabase:

641

if cachedelta and gdelta and revlog._lazydeltabase:

618

# Assume what we received from the server is a good choice

642

# Assume what we received from the server is a good choice

619

# build delta will reuse the cache

643

# build delta will reuse the cache

620

yield (cachedelta[0],)

644

yield (cachedelta[0],)

621

645

622

if gdelta:

646

if gdelta:

623

# exclude already lazy tested base if any

647

# exclude already lazy tested base if any

624

parents = [p for p in (p1, p2) if p != nullrev]

648

parents = [p for p in (p1, p2) if p != nullrev]

625

649

626

if not revlog._deltabothparents and len(parents) == 2:

650

if not revlog._deltabothparents and len(parents) == 2:

627

parents.sort()

651

parents.sort()

628

# To minimize the chance of having to build a fulltext,

652

# To minimize the chance of having to build a fulltext,

629

# pick first whichever parent is closest to us (max rev)

653

# pick first whichever parent is closest to us (max rev)

630

yield (parents[1],)

654

yield (parents[1],)

631

# then the other one (min rev) if the first did not fit

655

# then the other one (min rev) if the first did not fit

632

yield (parents[0],)

656

yield (parents[0],)

633

elif len(parents) > 0:

657

elif len(parents) > 0:

634

# Test all parents (1 or 2), and keep the best candidate

658

# Test all parents (1 or 2), and keep the best candidate

635

yield parents

659

yield parents

636

660

637

# other approach failed try against prev to hopefully save us a

661

# other approach failed try against prev to hopefully save us a

638

# fulltext.

662

# fulltext.

639

yield (prev,)

663

yield (prev,)

640

664

641

class deltacomputer(object):

665

class deltacomputer(object):

642

def __init__(self, revlog):

666

def __init__(self, revlog):

643

self.revlog = revlog

667

self.revlog = revlog

644

668

645

def buildtext(self, revinfo, fh):

669

def buildtext(self, revinfo, fh):

646

"""Builds a fulltext version of a revision

670

"""Builds a fulltext version of a revision

647

671

648

revinfo: _revisioninfo instance that contains all needed info

672

revinfo: _revisioninfo instance that contains all needed info

649

fh: file handle to either the .i or the .d revlog file,

673

fh: file handle to either the .i or the .d revlog file,

650

depending on whether it is inlined or not

674

depending on whether it is inlined or not

651

"""

675

"""

652

btext = revinfo.btext

676

btext = revinfo.btext

653

if btext[0] is not None:

677

if btext[0] is not None:

654

return btext[0]

678

return btext[0]

655

679

656

revlog = self.revlog

680

revlog = self.revlog

657

cachedelta = revinfo.cachedelta

681

cachedelta = revinfo.cachedelta

658

baserev = cachedelta[0]

682

baserev = cachedelta[0]

659

delta = cachedelta[1]

683

delta = cachedelta[1]

660

684

661

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

685

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

662

revinfo.p1, revinfo.p2,

686

revinfo.p1, revinfo.p2,

663

revinfo.flags, revinfo.node)

687

revinfo.flags, revinfo.node)

664

return fulltext

688

return fulltext

665

689

666

def _builddeltadiff(self, base, revinfo, fh):

690

def _builddeltadiff(self, base, revinfo, fh):

667

revlog = self.revlog

691

revlog = self.revlog

668

t = self.buildtext(revinfo, fh)

692

t = self.buildtext(revinfo, fh)

669

if revlog.iscensored(base):

693

if revlog.iscensored(base):

670

# deltas based on a censored revision must replace the

694

# deltas based on a censored revision must replace the

671

# full content in one patch, so delta works everywhere

695

# full content in one patch, so delta works everywhere

672

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

696

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

673

delta = header + t

697

delta = header + t

674

else:

698

else:

675

ptext = revlog.revision(base, _df=fh, raw=True)

699

ptext = revlog.revision(base, _df=fh, raw=True)

676

delta = mdiff.textdiff(ptext, t)

700

delta = mdiff.textdiff(ptext, t)

677

701

678

return delta

702

return delta

679

703

680

def _builddeltainfo(self, revinfo, base, fh):

704

def _builddeltainfo(self, revinfo, base, fh):

681

# can we use the cached delta?

705

# can we use the cached delta?

682

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

706

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

683

delta = revinfo.cachedelta[1]

707

delta = revinfo.cachedelta[1]

684

else:

708

else:

685

delta = self._builddeltadiff(base, revinfo, fh)

709

delta = self._builddeltadiff(base, revinfo, fh)

686

revlog = self.revlog

710

revlog = self.revlog

687

header, data = revlog.compress(delta)

711

header, data = revlog.compress(delta)

688

deltalen = len(header) + len(data)

712

deltalen = len(header) + len(data)

689

chainbase = revlog.chainbase(base)

713

chainbase = revlog.chainbase(base)

690

offset = revlog.end(len(revlog) - 1)

714

offset = revlog.end(len(revlog) - 1)

691

dist = deltalen + offset - revlog.start(chainbase)

715

dist = deltalen + offset - revlog.start(chainbase)

692

if revlog._generaldelta:

716

if revlog._generaldelta:

693

deltabase = base

717

deltabase = base

694

else:

718

else:

695

deltabase = chainbase

719

deltabase = chainbase

696

chainlen, compresseddeltalen = revlog._chaininfo(base)

720

chainlen, compresseddeltalen = revlog._chaininfo(base)

697

chainlen += 1

721

chainlen += 1

698

compresseddeltalen += deltalen

722

compresseddeltalen += deltalen

699

723

700

revlog = self.revlog

724

revlog = self.revlog

701

snapshotdepth = None

725

snapshotdepth = None

702

if deltabase == nullrev:

726

if deltabase == nullrev:

703

snapshotdepth = 0

727

snapshotdepth = 0

704

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

728

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

705

# A delta chain should always be one full snapshot,

729

# A delta chain should always be one full snapshot,

706

# zero or more semi-snapshots, and zero or more deltas

730

# zero or more semi-snapshots, and zero or more deltas

707

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

731

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

708

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

732

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

709

snapshotdepth = len(revlog._deltachain(deltabase)[0])

733

snapshotdepth = len(revlog._deltachain(deltabase)[0])

710

734

711

return _deltainfo(dist, deltalen, (header, data), deltabase,

735

return _deltainfo(dist, deltalen, (header, data), deltabase,

712

chainbase, chainlen, compresseddeltalen,

736

chainbase, chainlen, compresseddeltalen,

713

snapshotdepth)

737

snapshotdepth)

714

738

715

def _fullsnapshotinfo(self, fh, revinfo):

739

def _fullsnapshotinfo(self, fh, revinfo):

716

curr = len(self.revlog)

740

curr = len(self.revlog)

717

rawtext = self.buildtext(revinfo, fh)

741

rawtext = self.buildtext(revinfo, fh)

718

data = self.revlog.compress(rawtext)

742

data = self.revlog.compress(rawtext)

719

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

743

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

720

deltabase = chainbase = curr

744

deltabase = chainbase = curr

721

snapshotdepth = 0

745

snapshotdepth = 0

722

chainlen = 1

746

chainlen = 1

723

747

724

return _deltainfo(dist, deltalen, data, deltabase,

748

return _deltainfo(dist, deltalen, data, deltabase,

725

chainbase, chainlen, compresseddeltalen,

749

chainbase, chainlen, compresseddeltalen,

726

snapshotdepth)

750

snapshotdepth)

727

751

728

def finddeltainfo(self, revinfo, fh):

752

def finddeltainfo(self, revinfo, fh):

729

"""Find an acceptable delta against a candidate revision

753

"""Find an acceptable delta against a candidate revision

730

754

731

revinfo: information about the revision (instance of _revisioninfo)

755

revinfo: information about the revision (instance of _revisioninfo)

732

fh: file handle to either the .i or the .d revlog file,

756

fh: file handle to either the .i or the .d revlog file,

733

depending on whether it is inlined or not

757

depending on whether it is inlined or not

734

758

735

Returns the first acceptable candidate revision, as ordered by

759

Returns the first acceptable candidate revision, as ordered by

736

_candidategroups

760

_candidategroups

737

761

738

If no suitable deltabase is found, we return delta info for a full

762

If no suitable deltabase is found, we return delta info for a full

739

snapshot.

763

snapshot.

740

"""

764

"""

741

if not revinfo.textlen:

765

if not revinfo.textlen:

742

return self._fullsnapshotinfo(fh, revinfo)

766

return self._fullsnapshotinfo(fh, revinfo)

743

767

744

# no delta for flag processor revision (see "candelta" for why)

768

# no delta for flag processor revision (see "candelta" for why)

745

# not calling candelta since only one revision needs test, also to

769

# not calling candelta since only one revision needs test, also to

746

# avoid overhead fetching flags again.

770

# avoid overhead fetching flags again.

747

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

771

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

748

return self._fullsnapshotinfo(fh, revinfo)

772

return self._fullsnapshotinfo(fh, revinfo)

749

773

750

cachedelta = revinfo.cachedelta

774

cachedelta = revinfo.cachedelta

751

p1 = revinfo.p1

775

p1 = revinfo.p1

752

p2 = revinfo.p2

776

p2 = revinfo.p2

753

revlog = self.revlog

777

revlog = self.revlog

754

778

755

deltalength = self.revlog.length

756

deltaparent = self.revlog.deltaparent

757

758

deltainfo = None

779

deltainfo = None

759

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

760

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

780

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

761

groups = _candidategroups(self.revlog, p1r, ~~p2r~~, ~~cachedelta~~)

781

groups = _candidategroups(self.revlog, revinfo.textlen,

782

p1r, p2r, cachedelta)

762

for candidaterevs in groups:

783

for candidaterevs in groups:

763

# filter out delta base that will never produce good delta

764

candidaterevs = [r for r in candidaterevs

765

if self.revlog.length(r) <= deltas_limit]

766

nominateddeltas = []

784

nominateddeltas = []

767

for candidaterev in candidaterevs:

785

for candidaterev in candidaterevs:

768

# skip over empty delta (no need to include them in a chain)

769

while candidaterev != nullrev and not deltalength(candidaterev):

770

candidaterev = deltaparent(candidaterev)

771

# no need to try a delta against nullid, this will be handled

772

# by fulltext later.

773

if candidaterev == nullrev:

774

continue

775

# no delta for rawtext-changing revs (see "candelta" for why)

776

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

777

continue

778

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

786

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

779

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

787

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

780

nominateddeltas.append(candidatedelta)

788

nominateddeltas.append(candidatedelta)

781

if nominateddeltas:

789

if nominateddeltas:

782

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

790

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

783

break

791

break

784

792

785

if deltainfo is None:

793

if deltainfo is None:

786

deltainfo = self._fullsnapshotinfo(fh, revinfo)

794

deltainfo = self._fullsnapshotinfo(fh, revinfo)

787

return deltainfo

795

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             RevlogError = error.RevlogError
             CensoredNodeError = error.CensoredNodeError
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise RevlogError(_('node %s is not censored') % expectednode)
                 except CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 if revlog._sparserevlog:
                     # As sparse-read will be used, we can consider that the distance,
                     # instead of being the span of the whole chunk,
                     # is the span of the largest read chunk
                     base = deltainfo.base
                     if base != nullrev:
                         deltachain = revlog._deltachain(base)[0]
                     else:
                         deltachain = []
                     # search for the first non-snapshot revision
                     for idx, r in enumerate(deltachain):
                         if not revlog.issnapshot(r):
                             break
                     deltachain = deltachain[idx:]
                     chunks = slicechunk(revlog, deltachain, deltainfo)
                     all_span = [segmentspan(revlog, revs, deltainfo)
                                 for revs in chunks]
                     distance = max(all_span)
                 else:
                     distance = deltainfo.distance
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
                     # In multiple place, we are ignoring irrelevant data range below a
                     # certain size. Be also apply this tradeoff here and relax span
                     # constraint for small enought content.
                     maxdist = revlog._srmingapsize
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 if maxdist < distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
-            def _candidategroups(revlog, p1, p2, cachedelta):
+            def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     return
+                deltalength = revlog.length
+                deltaparent = revlog.deltaparent
+                deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
-                for group in _rawgroups(revlog, p1, p2, cachedelta):
+                for temptative in _rawgroups(revlog, p1, p2, cachedelta):
-                    group = tuple(r for r in group if r not in tested)
+                    group = []
-                    tested.update(group)
+                    for rev in temptative:
+                        # skip over empty delta (no need to include them in a chain)
+                        while not (rev == nullrev or rev in tested or deltalength(rev)):
+                            rev = deltaparent(rev)
+                            tested.add(rev)
+                        # filter out revision we tested already
+                        if rev in tested:
+                            continue
+                        tested.add(rev)
+                        # filter out delta base that will never produce good delta
+                        if deltas_limit < revlog.length(rev):
+                            continue
+                        # no need to try a delta against nullrev, this will be done as a
+                        # last resort.
+                        if rev == nullrev:
+                            continue
+                        # no delta for rawtext-changing revs (see "candelta" for why)
+                        if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
+                            continue
+                        group.append(rev)
                     if group:
-                        yield group
+                        yield tuple(group)
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 curr = len(revlog)
                 prev = curr - 1
                 # should we try to build a delta?
                 if prev != nullrev and revlog._storedeltachains:
                     tested = set()
                     # This condition is true most of the time when processing
                     # changegroup data into a generaldelta repo. The only time it
                     # isn't true is if this is the first revision in a delta chain
                     # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                     if cachedelta and gdelta and revlog._lazydeltabase:
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         yield (cachedelta[0],)
                         tested.add(cachedelta[0])
                 # This condition is true most of the time when processing
                 # changegroup data into a generaldelta repo. The only time it
                 # isn't true is if this is the first revision in a delta chain
                 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                 if cachedelta and gdelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     yield (cachedelta[0],)
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 # other approach failed try against prev to hopefully save us a
                 # fulltext.
                 yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
-                    deltalength = self.revlog.length
-                    deltaparent = self.revlog.deltaparent
                     deltainfo = None
-                    deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
-                    groups = _candidategroups(self.revlog, p1r, p2r, cachedelta)
+                    groups = _candidategroups(self.revlog, revinfo.textlen,
+                                                         p1r, p2r, cachedelta)
                     for candidaterevs in groups:
-                        # filter out delta base that will never produce good delta
-                        candidaterevs = [r for r in candidaterevs
-                                         if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
-                            # skip over empty delta (no need to include them in a chain)
-                            while candidaterev != nullrev and not deltalength(candidaterev):
-                                candidaterev = deltaparent(candidaterev)
-                            # no need to try a delta against nullid, this will be handled
-                            # by fulltext later.
-                            if candidaterev == nullrev:
-                                continue
-                            # no delta for rawtext-changing revs (see "candelta" for why)
-                            if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
-                                continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo