upstream/mercurial-mirror Commit - r39631:a911932d

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import heapq

13

import heapq

14

import struct

14

import struct

15

16

# import stuff from node for others to import from revlog

16

# import stuff from node for others to import from revlog

17

from ..node import (

17

from ..node import (

18

nullrev,

18

nullrev,

19

)

19

)

20

from ..i18n import _

20

from ..i18n import _

21

22

from .constants import (

22

from .constants import (

23

REVIDX_ISCENSORED,

23

REVIDX_ISCENSORED,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

REVIDX_RAWTEXT_CHANGING_FLAGS,

25

)

25

)

26

27

from ..thirdparty import (

27

from ..thirdparty import (

28

attr,

28

attr,

29

)

29

)

30

31

from .. import (

31

from .. import (

32

error,

32

error,

33

mdiff,

33

mdiff,

34

)

34

)

35

36

RevlogError = error.RevlogError

36

RevlogError = error.RevlogError

37

CensoredNodeError = error.CensoredNodeError

37

CensoredNodeError = error.CensoredNodeError

38

39

# maximum <delta-chain-data>/<revision-text-length> ratio

39

# maximum <delta-chain-data>/<revision-text-length> ratio

40

LIMIT_DELTA2TEXT = 2

40

LIMIT_DELTA2TEXT = 2

41

42

class _testrevlog(object):

42

class _testrevlog(object):

43

"""minimalist fake revlog to use in doctests"""

43

"""minimalist fake revlog to use in doctests"""

44

45

def __init__(self, data, density=0.5, mingap=0):

45

def __init__(self, data, density=0.5, mingap=0):

46

"""data is an list of revision payload boundaries"""

46

"""data is an list of revision payload boundaries"""

47

self._data = data

47

self._data = data

48

self._srdensitythreshold = density

48

self._srdensitythreshold = density

49

self._srmingapsize = mingap

49

self._srmingapsize = mingap

50

51

def start(self, rev):

51

def start(self, rev):

52

if rev == 0:

52

if rev == 0:

53

return 0

53

return 0

54

return self._data[rev - 1]

54

return self._data[rev - 1]

55

56

def end(self, rev):

56

def end(self, rev):

57

return self._data[rev]

57

return self._data[rev]

58

59

def length(self, rev):

59

def length(self, rev):

60

return self.end(rev) - self.start(rev)

60

return self.end(rev) - self.start(rev)

61

62

def __len__(self):

62

def __len__(self):

63

return len(self._data)

63

return len(self._data)

64

65

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

65

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

67

68

``revs`` is sliced into groups that should be read in one time.

68

``revs`` is sliced into groups that should be read in one time.

69

Assume that revs are sorted.

69

Assume that revs are sorted.

70

71

The initial chunk is sliced until the overall density (payload/chunks-span

71

The initial chunk is sliced until the overall density (payload/chunks-span

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

73

`revlog._srmingapsize` is skipped.

73

`revlog._srmingapsize` is skipped.

74

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

76

For consistency with other slicing choice, this limit won't go lower than

76

For consistency with other slicing choice, this limit won't go lower than

77

`revlog._srmingapsize`.

77

`revlog._srmingapsize`.

78

79

If individual revisions chunk are larger than this limit, they will still

79

If individual revisions chunk are larger than this limit, they will still

80

be raised individually.

80

be raised individually.

81

82

>>> revlog = _testrevlog([

82

>>> revlog = _testrevlog([

83

... 5, #00 (5)

83

... 5, #00 (5)

84

... 10, #01 (5)

84

... 10, #01 (5)

85

... 12, #02 (2)

85

... 12, #02 (2)

86

... 12, #03 (empty)

86

... 12, #03 (empty)

87

... 27, #04 (15)

87

... 27, #04 (15)

88

... 31, #05 (4)

88

... 31, #05 (4)

89

... 31, #06 (empty)

89

... 31, #06 (empty)

90

... 42, #07 (11)

90

... 42, #07 (11)

91

... 47, #08 (5)

91

... 47, #08 (5)

92

... 47, #09 (empty)

92

... 47, #09 (empty)

93

... 48, #10 (1)

93

... 48, #10 (1)

94

... 51, #11 (3)

94

... 51, #11 (3)

95

... 74, #12 (23)

95

... 74, #12 (23)

96

... 85, #13 (11)

96

... 85, #13 (11)

97

... 86, #14 (1)

97

... 86, #14 (1)

98

... 91, #15 (5)

98

... 91, #15 (5)

99

... ])

99

... ])

100

101

>>> list(slicechunk(revlog, list(range(16))))

101

>>> list(slicechunk(revlog, list(range(16))))

102

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

103

>>> list(slicechunk(revlog, [0, 15]))

103

>>> list(slicechunk(revlog, [0, 15]))

104

[[0], [15]]

104

[[0], [15]]

105

>>> list(slicechunk(revlog, [0, 11, 15]))

105

>>> list(slicechunk(revlog, [0, 11, 15]))

106

[[0], [11], [15]]

106

[[0], [11], [15]]

107

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

108

[[0], [11, 13, 15]]

108

[[0], [11, 13, 15]]

109

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

110

[[1, 2], [5, 8, 10, 11], [14]]

110

[[1, 2], [5, 8, 10, 11], [14]]

111

112

Slicing with a maximum chunk size

112

Slicing with a maximum chunk size

113

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

114

[[0], [11], [13], [15]]

114

[[0], [11], [13], [15]]

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

116

[[0], [11], [13, 15]]

116

[[0], [11], [13, 15]]

117

"""

117

"""

118

if targetsize is not None:

118

if targetsize is not None:

119

targetsize = max(targetsize, revlog._srmingapsize)

119

targetsize = max(targetsize, revlog._srmingapsize)

120

# targetsize should not be specified when evaluating delta candidates:

120

# targetsize should not be specified when evaluating delta candidates:

121

# * targetsize is used to ensure we stay within specification when reading,

121

# * targetsize is used to ensure we stay within specification when reading,

122

# * deltainfo is used to pick are good delta chain when writing.

122

# * deltainfo is used to pick are good delta chain when writing.

123

if not (deltainfo is None or targetsize is None):

123

if not (deltainfo is None or targetsize is None):

124

msg = 'cannot use `targetsize` with a `deltainfo`'

124

msg = 'cannot use `targetsize` with a `deltainfo`'

125

raise error.ProgrammingError(msg)

125

raise error.ProgrammingError(msg)

126

for chunk in _slicechunktodensity(revlog, revs,

126

for chunk in _slicechunktodensity(revlog, revs,

127

deltainfo,

127

deltainfo,

128

revlog._srdensitythreshold,

128

revlog._srdensitythreshold,

129

revlog._srmingapsize):

129

revlog._srmingapsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

131

yield subchunk

131

yield subchunk

132

133

def _slicechunktosize(revlog, revs, targetsize=None):

133

def _slicechunktosize(revlog, revs, targetsize=None):

134

"""slice revs to match the target size

134

"""slice revs to match the target size

135

136

This is intended to be used on chunk that density slicing selected by that

136

This is intended to be used on chunk that density slicing selected by that

137

are still too large compared to the read garantee of revlog. This might

137

are still too large compared to the read garantee of revlog. This might

138

happens when "minimal gap size" interrupted the slicing or when chain are

138

happens when "minimal gap size" interrupted the slicing or when chain are

139

built in a way that create large blocks next to each other.

139

built in a way that create large blocks next to each other.

140

141

>>> revlog = _testrevlog([

141

>>> revlog = _testrevlog([

142

... 3, #0 (3)

142

... 3, #0 (3)

143

... 5, #1 (2)

143

... 5, #1 (2)

144

... 6, #2 (1)

144

... 6, #2 (1)

145

... 8, #3 (2)

145

... 8, #3 (2)

146

... 8, #4 (empty)

146

... 8, #4 (empty)

147

... 11, #5 (3)

147

... 11, #5 (3)

148

... 12, #6 (1)

148

... 12, #6 (1)

149

... 13, #7 (1)

149

... 13, #7 (1)

150

... 14, #8 (1)

150

... 14, #8 (1)

151

... ])

151

... ])

152

153

Cases where chunk is already small enough

153

Cases where chunk is already small enough

154

>>> list(_slicechunktosize(revlog, [0], 3))

154

>>> list(_slicechunktosize(revlog, [0], 3))

155

[[0]]

155

[[0]]

156

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

>>> list(_slicechunktosize(revlog, [6, 7], 3))

157

[[6, 7]]

157

[[6, 7]]

158

>>> list(_slicechunktosize(revlog, [0], None))

158

>>> list(_slicechunktosize(revlog, [0], None))

159

[[0]]

159

[[0]]

160

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

>>> list(_slicechunktosize(revlog, [6, 7], None))

161

[[6, 7]]

161

[[6, 7]]

162

163

cases where we need actual slicing

163

cases where we need actual slicing

164

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

>>> list(_slicechunktosize(revlog, [0, 1], 3))

165

[[0], [1]]

165

[[0], [1]]

166

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

>>> list(_slicechunktosize(revlog, [1, 3], 3))

167

[[1], [3]]

167

[[1], [3]]

168

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

169

[[1, 2], [3]]

169

[[1, 2], [3]]

170

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

>>> list(_slicechunktosize(revlog, [3, 5], 3))

171

[[3], [5]]

171

[[3], [5]]

172

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

173

[[3], [5]]

173

[[3], [5]]

174

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

175

[[5], [6, 7, 8]]

175

[[5], [6, 7, 8]]

176

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

177

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

[[0], [1, 2], [3], [5], [6, 7, 8]]

178

179

Case with too large individual chunk (must return valid chunk)

179

Case with too large individual chunk (must return valid chunk)

180

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

>>> list(_slicechunktosize(revlog, [0, 1], 2))

181

[[0], [1]]

181

[[0], [1]]

182

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

>>> list(_slicechunktosize(revlog, [1, 3], 1))

183

[[1], [3]]

183

[[1], [3]]

184

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

185

[[3], [5]]

185

[[3], [5]]

186

"""

186

"""

187

assert targetsize is None or 0 <= targetsize

187

assert targetsize is None or 0 <= targetsize

188

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

188

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

189

yield revs

189

yield revs

190

return

190

return

191

192

startrevidx = 0

192

startrevidx = 0

193

startdata = revlog.start(revs[0])

193

startdata = revlog.start(revs[0])

194

endrevidx = 0

194

endrevidx = 0

195

iterrevs = enumerate(revs)

195

iterrevs = enumerate(revs)

196

next(iterrevs) # skip first rev.

196

next(iterrevs) # skip first rev.

197

for idx, r in iterrevs:

197

for idx, r in iterrevs:

198

span = revlog.end(r) - startdata

198

span = revlog.end(r) - startdata

199

if span <= targetsize:

199

if span <= targetsize:

200

endrevidx = idx

200

endrevidx = idx

201

else:

201

else:

202

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

202

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

203

if chunk:

203

if chunk:

204

yield chunk

204

yield chunk

205

startrevidx = idx

205

startrevidx = idx

206

startdata = revlog.start(r)

206

startdata = revlog.start(r)

207

endrevidx = idx

207

endrevidx = idx

208

yield _trimchunk(revlog, revs, startrevidx)

208

yield _trimchunk(revlog, revs, startrevidx)

209

210

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

210

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

211

mingapsize=0):

211

mingapsize=0):

212

"""slice revs to reduce the amount of unrelated data to be read from disk.

212

"""slice revs to reduce the amount of unrelated data to be read from disk.

213

214

``revs`` is sliced into groups that should be read in one time.

214

``revs`` is sliced into groups that should be read in one time.

215

Assume that revs are sorted.

215

Assume that revs are sorted.

216

217

``deltainfo`` is a _deltainfo instance of a revision that we would append

217

``deltainfo`` is a _deltainfo instance of a revision that we would append

218

to the top of the revlog.

218

to the top of the revlog.

219

220

The initial chunk is sliced until the overall density (payload/chunks-span

220

The initial chunk is sliced until the overall density (payload/chunks-span

221

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

221

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

222

skipped.

222

skipped.

223

224

>>> revlog = _testrevlog([

224

>>> revlog = _testrevlog([

225

... 5, #00 (5)

225

... 5, #00 (5)

226

... 10, #01 (5)

226

... 10, #01 (5)

227

... 12, #02 (2)

227

... 12, #02 (2)

228

... 12, #03 (empty)

228

... 12, #03 (empty)

229

... 27, #04 (15)

229

... 27, #04 (15)

230

... 31, #05 (4)

230

... 31, #05 (4)

231

... 31, #06 (empty)

231

... 31, #06 (empty)

232

... 42, #07 (11)

232

... 42, #07 (11)

233

... 47, #08 (5)

233

... 47, #08 (5)

234

... 47, #09 (empty)

234

... 47, #09 (empty)

235

... 48, #10 (1)

235

... 48, #10 (1)

236

... 51, #11 (3)

236

... 51, #11 (3)

237

... 74, #12 (23)

237

... 74, #12 (23)

238

... 85, #13 (11)

238

... 85, #13 (11)

239

... 86, #14 (1)

239

... 86, #14 (1)

240

... 91, #15 (5)

240

... 91, #15 (5)

241

... ])

241

... ])

242

243

>>> list(_slicechunktodensity(revlog, list(range(16))))

243

>>> list(_slicechunktodensity(revlog, list(range(16))))

244

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

244

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

245

>>> list(_slicechunktodensity(revlog, [0, 15]))

245

>>> list(_slicechunktodensity(revlog, [0, 15]))

246

[[0], [15]]

246

[[0], [15]]

247

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

247

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

248

[[0], [11], [15]]

248

[[0], [11], [15]]

249

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

249

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

250

[[0], [11, 13, 15]]

250

[[0], [11, 13, 15]]

251

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

251

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

252

[[1, 2], [5, 8, 10, 11], [14]]

252

[[1, 2], [5, 8, 10, 11], [14]]

253

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

254

... mingapsize=20))

254

... mingapsize=20))

255

[[1, 2, 3, 5, 8, 10, 11], [14]]

255

[[1, 2, 3, 5, 8, 10, 11], [14]]

256

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

257

... targetdensity=0.95))

257

... targetdensity=0.95))

258

[[1, 2], [5], [8, 10, 11], [14]]

258

[[1, 2], [5], [8, 10, 11], [14]]

259

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

259

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

260

... targetdensity=0.95, mingapsize=12))

260

... targetdensity=0.95, mingapsize=12))

261

[[1, 2], [5, 8, 10, 11], [14]]

261

[[1, 2], [5, 8, 10, 11], [14]]

262

"""

262

"""

263

start = revlog.start

263

start = revlog.start

264

length = revlog.length

264

length = revlog.length

265

266

if len(revs) <= 1:

266

if len(revs) <= 1:

267

yield revs

267

yield revs

268

return

268

return

269

270

nextrev = len(revlog)

270

nextrev = len(revlog)

271

nextoffset = revlog.end(nextrev - 1)

271

nextoffset = revlog.end(nextrev - 1)

272

273

if deltainfo is None:

273

if deltainfo is None:

274

deltachainspan = segmentspan(revlog, revs)

274

deltachainspan = segmentspan(revlog, revs)

275

chainpayload = sum(length(r) for r in revs)

275

chainpayload = sum(length(r) for r in revs)

276

else:

276

else:

277

deltachainspan = deltainfo.distance

277

deltachainspan = deltainfo.distance

278

chainpayload = deltainfo.compresseddeltalen

278

chainpayload = deltainfo.compresseddeltalen

279

280

if deltachainspan < mingapsize:

280

if deltachainspan < mingapsize:

281

yield revs

281

yield revs

282

return

282

return

283

284

readdata = deltachainspan

284

readdata = deltachainspan

285

286

if deltachainspan:

286

if deltachainspan:

287

density = chainpayload / float(deltachainspan)

287

density = chainpayload / float(deltachainspan)

288

else:

288

else:

289

density = 1.0

289

density = 1.0

290

291

if density >= targetdensity:

291

if density >= targetdensity:

292

yield revs

292

yield revs

293

return

293

return

294

295

if deltainfo is not None and deltainfo.deltalen:

295

if deltainfo is not None and deltainfo.deltalen:

296

revs = list(revs)

296

revs = list(revs)

297

revs.append(nextrev)

297

revs.append(nextrev)

298

299

# Store the gaps in a heap to have them sorted by decreasing size

299

# Store the gaps in a heap to have them sorted by decreasing size

300

gapsheap = []

300

gapsheap = []

301

heapq.heapify(gapsheap)

301

heapq.heapify(gapsheap)

302

prevend = None

302

prevend = None

303

for i, rev in enumerate(revs):

303

for i, rev in enumerate(revs):

304

if rev < nextrev:

304

if rev < nextrev:

305

revstart = start(rev)

305

revstart = start(rev)

306

revlen = length(rev)

306

revlen = length(rev)

307

else:

307

else:

308

revstart = nextoffset

308

revstart = nextoffset

309

revlen = deltainfo.deltalen

309

revlen = deltainfo.deltalen

310

311

# Skip empty revisions to form larger holes

311

# Skip empty revisions to form larger holes

312

if revlen == 0:

312

if revlen == 0:

313

continue

313

continue

314

315

if prevend is not None:

315

if prevend is not None:

316

gapsize = revstart - prevend

316

gapsize = revstart - prevend

317

# only consider holes that are large enough

317

# only consider holes that are large enough

318

if gapsize > mingapsize:

318

if gapsize > mingapsize:

319

heapq.heappush(gapsheap, (-gapsize, i))

319

heapq.heappush(gapsheap, (-gapsize, i))

320

321

prevend = revstart + revlen

321

prevend = revstart + revlen

322

323

# Collect the indices of the largest holes until the density is acceptable

323

# Collect the indices of the largest holes until the density is acceptable

324

indicesheap = []

324

indicesheap = []

325

heapq.heapify(indicesheap)

325

heapq.heapify(indicesheap)

326

while gapsheap and density < targetdensity:

326

while gapsheap and density < targetdensity:

327

oppgapsize, gapidx = heapq.heappop(gapsheap)

327

oppgapsize, gapidx = heapq.heappop(gapsheap)

328

329

heapq.heappush(indicesheap, gapidx)

329

heapq.heappush(indicesheap, gapidx)

330

331

# the gap sizes are stored as negatives to be sorted decreasingly

331

# the gap sizes are stored as negatives to be sorted decreasingly

332

# by the heap

332

# by the heap

333

readdata -= (-oppgapsize)

333

readdata -= (-oppgapsize)

334

if readdata > 0:

334

if readdata > 0:

335

density = chainpayload / float(readdata)

335

density = chainpayload / float(readdata)

336

else:

336

else:

337

density = 1.0

337

density = 1.0

338

339

# Cut the revs at collected indices

339

# Cut the revs at collected indices

340

previdx = 0

340

previdx = 0

341

while indicesheap:

341

while indicesheap:

342

idx = heapq.heappop(indicesheap)

342

idx = heapq.heappop(indicesheap)

343

344

chunk = _trimchunk(revlog, revs, previdx, idx)

344

chunk = _trimchunk(revlog, revs, previdx, idx)

345

if chunk:

345

if chunk:

346

yield chunk

346

yield chunk

347

348

previdx = idx

348

previdx = idx

349

350

chunk = _trimchunk(revlog, revs, previdx)

350

chunk = _trimchunk(revlog, revs, previdx)

351

if chunk:

351

if chunk:

352

yield chunk

352

yield chunk

353

354

def _trimchunk(revlog, revs, startidx, endidx=None):

354

def _trimchunk(revlog, revs, startidx, endidx=None):

355

"""returns revs[startidx:endidx] without empty trailing revs

355

"""returns revs[startidx:endidx] without empty trailing revs

356

357

Doctest Setup

357

Doctest Setup

358

>>> revlog = _testrevlog([

358

>>> revlog = _testrevlog([

359

... 5, #0

359

... 5, #0

360

... 10, #1

360

... 10, #1

361

... 12, #2

361

... 12, #2

362

... 12, #3 (empty)

362

... 12, #3 (empty)

363

... 17, #4

363

... 17, #4

364

... 21, #5

364

... 21, #5

365

... 21, #6 (empty)

365

... 21, #6 (empty)

366

... ])

366

... ])

367

368

Contiguous cases:

368

Contiguous cases:

369

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

369

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

370

[0, 1, 2, 3, 4, 5]

370

[0, 1, 2, 3, 4, 5]

371

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

371

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

372

[0, 1, 2, 3, 4]

372

[0, 1, 2, 3, 4]

373

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

373

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

374

[0, 1, 2]

374

[0, 1, 2]

375

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

375

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

376

[2]

376

[2]

377

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

377

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

378

[3, 4, 5]

378

[3, 4, 5]

379

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

379

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

380

[3, 4]

380

[3, 4]

381

382

Discontiguous cases:

382

Discontiguous cases:

383

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

383

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

384

[1, 3, 5]

384

[1, 3, 5]

385

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

385

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

386

[1]

386

[1]

387

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

387

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

388

[3, 5]

388

[3, 5]

389

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

389

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

390

[3, 5]

390

[3, 5]

391

"""

391

"""

392

length = revlog.length

392

length = revlog.length

393

394

if endidx is None:

394

if endidx is None:

395

endidx = len(revs)

395

endidx = len(revs)

396

397

# If we have a non-emtpy delta candidate, there are nothing to trim

397

# If we have a non-emtpy delta candidate, there are nothing to trim

398

if revs[endidx - 1] < len(revlog):

398

if revs[endidx - 1] < len(revlog):

399

# Trim empty revs at the end, except the very first revision of a chain

399

# Trim empty revs at the end, except the very first revision of a chain

400

while (endidx > 1

400

while (endidx > 1

401

and endidx > startidx

401

and endidx > startidx

402

and length(revs[endidx - 1]) == 0):

402

and length(revs[endidx - 1]) == 0):

403

endidx -= 1

403

endidx -= 1

404

405

return revs[startidx:endidx]

405

return revs[startidx:endidx]

406

407

def segmentspan(revlog, revs, deltainfo=None):

407

def segmentspan(revlog, revs, deltainfo=None):

408

"""Get the byte span of a segment of revisions

408

"""Get the byte span of a segment of revisions

409

410

revs is a sorted array of revision numbers

410

revs is a sorted array of revision numbers

411

412

>>> revlog = _testrevlog([

412

>>> revlog = _testrevlog([

413

... 5, #0

413

... 5, #0

414

... 10, #1

414

... 10, #1

415

... 12, #2

415

... 12, #2

416

... 12, #3 (empty)

416

... 12, #3 (empty)

417

... 17, #4

417

... 17, #4

418

... ])

418

... ])

419

420

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

420

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

421

17

421

17

422

>>> segmentspan(revlog, [0, 4])

422

>>> segmentspan(revlog, [0, 4])

423

17

423

17

424

>>> segmentspan(revlog, [3, 4])

424

>>> segmentspan(revlog, [3, 4])

425

5

425

5

426

>>> segmentspan(revlog, [1, 2, 3,])

426

>>> segmentspan(revlog, [1, 2, 3,])

427

7

427

7

428

>>> segmentspan(revlog, [1, 3])

428

>>> segmentspan(revlog, [1, 3])

429

7

429

7

430

"""

430

"""

431

if not revs:

431

if not revs:

432

return 0

432

return 0

433

if deltainfo is not None and len(revlog) <= revs[-1]:

433

if deltainfo is not None and len(revlog) <= revs[-1]:

434

if len(revs) == 1:

434

if len(revs) == 1:

435

return deltainfo.deltalen

435

return deltainfo.deltalen

436

offset = revlog.end(len(revlog) - 1)

436

offset = revlog.end(len(revlog) - 1)

437

end = deltainfo.deltalen + offset

437

end = deltainfo.deltalen + offset

438

else:

438

else:

439

end = revlog.end(revs[-1])

439

end = revlog.end(revs[-1])

440

return end - revlog.start(revs[0])

440

return end - revlog.start(revs[0])

441

442

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

442

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

443

"""build full text from a (base, delta) pair and other metadata"""

443

"""build full text from a (base, delta) pair and other metadata"""

444

# special case deltas which replace entire base; no need to decode

444

# special case deltas which replace entire base; no need to decode

445

# base revision. this neatly avoids censored bases, which throw when

445

# base revision. this neatly avoids censored bases, which throw when

446

# they're decoded.

446

# they're decoded.

447

hlen = struct.calcsize(">lll")

447

hlen = struct.calcsize(">lll")

448

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

448

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

449

len(delta) - hlen):

449

len(delta) - hlen):

450

fulltext = delta[hlen:]

450

fulltext = delta[hlen:]

451

else:

451

else:

452

# deltabase is rawtext before changed by flag processors, which is

452

# deltabase is rawtext before changed by flag processors, which is

453

# equivalent to non-raw text

453

# equivalent to non-raw text

454

basetext = revlog.revision(baserev, _df=fh, raw=False)

454

basetext = revlog.revision(baserev, _df=fh, raw=False)

455

fulltext = mdiff.patch(basetext, delta)

455

fulltext = mdiff.patch(basetext, delta)

456

457

try:

457

try:

458

res = revlog._processflags(fulltext, flags, 'read', raw=True)

458

res = revlog._processflags(fulltext, flags, 'read', raw=True)

459

fulltext, validatehash = res

459

fulltext, validatehash = res

460

if validatehash:

460

if validatehash:

461

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

461

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

462

if flags & REVIDX_ISCENSORED:

462

if flags & REVIDX_ISCENSORED:

463

raise RevlogError(_('node %s is not censored') % expectednode)

463

raise RevlogError(_('node %s is not censored') % expectednode)

464

except CensoredNodeError:

464

except CensoredNodeError:

465

# must pass the censored index flag to add censored revisions

465

# must pass the censored index flag to add censored revisions

466

if not flags & REVIDX_ISCENSORED:

466

if not flags & REVIDX_ISCENSORED:

467

raise

467

raise

468

return fulltext

468

return fulltext

469

470

@attr.s(slots=True, frozen=True)

470

@attr.s(slots=True, frozen=True)

471

class _deltainfo(object):

471

class _deltainfo(object):

472

distance = attr.ib()

472

distance = attr.ib()

473

deltalen = attr.ib()

473

deltalen = attr.ib()

474

data = attr.ib()

474

data = attr.ib()

475

base = attr.ib()

475

base = attr.ib()

476

chainbase = attr.ib()

476

chainbase = attr.ib()

477

chainlen = attr.ib()

477

chainlen = attr.ib()

478

compresseddeltalen = attr.ib()

478

compresseddeltalen = attr.ib()

479

snapshotdepth = attr.ib()

479

snapshotdepth = attr.ib()

480

481

def isgooddeltainfo(revlog, deltainfo, revinfo):

481

def isgooddeltainfo(revlog, deltainfo, revinfo):

482

"""Returns True if the given delta is good. Good means that it is within

482

"""Returns True if the given delta is good. Good means that it is within

483

the disk span, disk size, and chain length bounds that we know to be

483

the disk span, disk size, and chain length bounds that we know to be

484

performant."""

484

performant."""

485

if deltainfo is None:

485

if deltainfo is None:

486

return False

486

return False

487

488

# - 'deltainfo.distance' is the distance from the base revision --

488

# - 'deltainfo.distance' is the distance from the base revision --

489

# bounding it limits the amount of I/O we need to do.

489

# bounding it limits the amount of I/O we need to do.

490

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

490

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

491

# deltas we need to apply -- bounding it limits the amount of CPU

491

# deltas we need to apply -- bounding it limits the amount of CPU

492

# we consume.

492

# we consume.

493

494

if revlog._sparserevlog:

494

if revlog._sparserevlog:

495

# As sparse-read will be used, we can consider that the distance,

495

# As sparse-read will be used, we can consider that the distance,

496

# instead of being the span of the whole chunk,

496

# instead of being the span of the whole chunk,

497

# is the span of the largest read chunk

497

# is the span of the largest read chunk

498

base = deltainfo.base

498

base = deltainfo.base

499

500

if base != nullrev:

500

if base != nullrev:

501

deltachain = revlog._deltachain(base)[0]

501

deltachain = revlog._deltachain(base)[0]

502

else:

502

else:

503

deltachain = []

503

deltachain = []

504

505

# search for the first non-snapshot revision

505

# search for the first non-snapshot revision

506

for idx, r in enumerate(deltachain):

506

for idx, r in enumerate(deltachain):

507

if not revlog.issnapshot(r):

507

if not revlog.issnapshot(r):

508

break

508

break

509

deltachain = deltachain[idx:]

509

deltachain = deltachain[idx:]

510

chunks = slicechunk(revlog, deltachain, deltainfo)

510

chunks = slicechunk(revlog, deltachain, deltainfo)

511

all_span = [segmentspan(revlog, revs, deltainfo)

511

all_span = [segmentspan(revlog, revs, deltainfo)

512

for revs in chunks]

512

for revs in chunks]

513

distance = max(all_span)

513

distance = max(all_span)

514

else:

514

else:

515

distance = deltainfo.distance

515

distance = deltainfo.distance

516

517

textlen = revinfo.textlen

517

textlen = revinfo.textlen

518

defaultmax = textlen * 4

518

defaultmax = textlen * 4

519

maxdist = revlog._maxdeltachainspan

519

maxdist = revlog._maxdeltachainspan

520

if not maxdist:

520

if not maxdist:

521

maxdist = distance # ensure the conditional pass

521

maxdist = distance # ensure the conditional pass

522

maxdist = max(maxdist, defaultmax)

522

maxdist = max(maxdist, defaultmax)

523

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

523

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

524

# In multiple place, we are ignoring irrelevant data range below a

524

# In multiple place, we are ignoring irrelevant data range below a

525

# certain size. Be also apply this tradeoff here and relax span

525

# certain size. Be also apply this tradeoff here and relax span

526

# constraint for small enought content.

526

# constraint for small enought content.

527

maxdist = revlog._srmingapsize

527

maxdist = revlog._srmingapsize

528

529

# Bad delta from read span:

529

# Bad delta from read span:

530

#

530

#

531

# If the span of data read is larger than the maximum allowed.

531

# If the span of data read is larger than the maximum allowed.

532

if maxdist < distance:

532

if maxdist < distance:

533

return False

533

return False

534

535

# Bad delta from new delta size:

535

# Bad delta from new delta size:

536

#

536

#

537

# If the delta size is larger than the target text, storing the

537

# If the delta size is larger than the target text, storing the

538

# delta will be inefficient.

538

# delta will be inefficient.

539

if textlen < deltainfo.deltalen:

539

if textlen < deltainfo.deltalen:

540

return False

540

return False

541

542

# Bad delta from cumulated payload size:

542

# Bad delta from cumulated payload size:

543

#

543

#

544

# If the sum of delta get larger than K * target text length.

544

# If the sum of delta get larger than K * target text length.

545

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

545

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

546

return False

546

return False

547

548

# Bad delta from chain length:

548

# Bad delta from chain length:

549

#

549

#

550

# If the number of delta in the chain gets too high.

550

# If the number of delta in the chain gets too high.

551

if (revlog._maxchainlen

551

if (revlog._maxchainlen

552

and revlog._maxchainlen < deltainfo.chainlen):

552

and revlog._maxchainlen < deltainfo.chainlen):

553

return False

553

return False

554

555

# bad delta from intermediate snapshot size limit

555

# bad delta from intermediate snapshot size limit

556

#

556

#

557

# If an intermediate snapshot size is higher than the limit. The

557

# If an intermediate snapshot size is higher than the limit. The

558

# limit exist to prevent endless chain of intermediate delta to be

558

# limit exist to prevent endless chain of intermediate delta to be

559

# created.

559

# created.

560

if (deltainfo.snapshotdepth is not None and

560

if (deltainfo.snapshotdepth is not None and

561

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

561

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

562

return False

562

return False

563

564

# bad delta if new intermediate snapshot is larger than the previous

564

# bad delta if new intermediate snapshot is larger than the previous

565

# snapshot

565

# snapshot

566

if (deltainfo.snapshotdepth

566

if (deltainfo.snapshotdepth

567

and revlog.length(deltainfo.base) < deltainfo.deltalen):

567

and revlog.length(deltainfo.base) < deltainfo.deltalen):

568

return False

568

return False

569

570

return True

570

return True

571

572

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

572

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

573

"""Provides group of revision to be tested as delta base

573

"""Provides group of revision to be tested as delta base

574

575

This top level function focus on emitting groups with unique and worthwhile

575

This top level function focus on emitting groups with unique and worthwhile

576

content. See _raw_candidate_groups for details about the group order.

576

content. See _raw_candidate_groups for details about the group order.

577

"""

577

"""

578

# should we try to build a delta?

578

# should we try to build a delta?

579

if not (len(revlog) and revlog._storedeltachains):

579

if not (len(revlog) and revlog._storedeltachains):

580

yield None

580

yield None

581

return

581

return

582

583

deltalength = revlog.length

583

deltalength = revlog.length

584

deltaparent = revlog.deltaparent

584

deltaparent = revlog.deltaparent

585

good = None

585

good = None

586

587

deltas_limit = textlen * LIMIT_DELTA2TEXT

587

deltas_limit = textlen * LIMIT_DELTA2TEXT

588

589

tested = set([nullrev])

589

tested = set([nullrev])

590

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

590

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

591

while True:

591

while True:

592

temptative = candidates.send(good)

592

temptative = candidates.send(good)

593

if temptative is None:

593

if temptative is None:

594

break

594

break

595

group = []

595

group = []

596

for rev in temptative:

596

for rev in temptative:

597

# skip over empty delta (no need to include them in a chain)

597

# skip over empty delta (no need to include them in a chain)

598

while not (rev == nullrev or rev in tested or deltalength(rev)):

598

while not (rev == nullrev or rev in tested or deltalength(rev)):

599

tested.add(rev)

599

tested.add(rev)

600

rev = deltaparent(rev)

600

rev = deltaparent(rev)

601

# filter out revision we tested already

601

# filter out revision we tested already

602

if rev in tested:

602

if rev in tested:

603

continue

603

continue

604

tested.add(rev)

604

tested.add(rev)

605

# filter out delta base that will never produce good delta

605

# filter out delta base that will never produce good delta

606

if deltas_limit < revlog.length(rev):

606

if deltas_limit < revlog.length(rev):

607

continue

607

continue

608

# no need to try a delta against nullrev, this will be done as a

608

# no need to try a delta against nullrev, this will be done as a

609

# last resort.

609

# last resort.

610

if rev == nullrev:

610

if rev == nullrev:

611

continue

611

continue

612

# no delta for rawtext-changing revs (see "candelta" for why)

612

# no delta for rawtext-changing revs (see "candelta" for why)

613

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

613

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

614

continue

614

continue

615

group.append(rev)

615

group.append(rev)

616

if group:

616

if group:

617

# XXX: in the sparse revlog case, group can become large,

617

# XXX: in the sparse revlog case, group can become large,

618

# impacting performances. Some bounding or slicing mecanism

618

# impacting performances. Some bounding or slicing mecanism

619

# would help to reduce this impact.

619

# would help to reduce this impact.

620

good = yield tuple(group)

620

good = yield tuple(group)

621

yield None

621

yield None

622

623

def _findsnapshots(revlog, cache, start_rev):

623

def _findsnapshots(revlog, cache, start_rev):

624

"""find snapshot from start_rev to tip"""

624

"""find snapshot from start_rev to tip"""

625

deltaparent = revlog.deltaparent

625

deltaparent = revlog.deltaparent

626

issnapshot = revlog.issnapshot

626

issnapshot = revlog.issnapshot

627

for rev in revlog.revs(start_rev):

627

for rev in revlog.revs(start_rev):

628

if issnapshot(rev):

628

if issnapshot(rev):

629

cache[deltaparent(rev)].append(rev)

629

cache[deltaparent(rev)].append(rev)

630

631

def _refinedgroups(revlog, p1, p2, cachedelta):

631

def _refinedgroups(revlog, p1, p2, cachedelta):

632

good = None

632

good = None

633

# First we try to reuse a the delta contained in the bundle.

633

# First we try to reuse a the delta contained in the bundle.

634

# (or from the source revlog)

634

# (or from the source revlog)

635

#

635

#

636

# This logic only applies to general delta repositories and can be disabled

636

# This logic only applies to general delta repositories and can be disabled

637

# through configuration. Disabling reuse source delta is useful when

637

# through configuration. Disabling reuse source delta is useful when

638

# we want to make sure we recomputed "optimal" deltas.

638

# we want to make sure we recomputed "optimal" deltas.

639

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

639

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

640

# Assume what we received from the server is a good choice

640

# Assume what we received from the server is a good choice

641

# build delta will reuse the cache

641

# build delta will reuse the cache

642

good = yield (cachedelta[0],)

642

good = yield (cachedelta[0],)

643

if good is not None:

643

if good is not None:

644

yield None

644

yield None

645

return

645

return

646

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

646

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

647

good = yield candidates

647

good = yield candidates

648

if good is not None:

648

if good is not None:

649

break

649

break

650

651

# if we have a refinable value, try to refine it

651

# if we have a refinable value, try to refine it

652

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

652

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

653

# refine snapshot down

653

# refine snapshot down

654

previous = None

654

previous = None

655

while previous != good:

655

while previous != good:

656

previous = good

656

previous = good

657

base = revlog.deltaparent(good)

657

base = revlog.deltaparent(good)

658

if base == nullrev:

658

if base == nullrev:

659

break

659

break

660

good = yield (base,)

660

good = yield (base,)

661

# refine snapshot up

661

# refine snapshot up

662

#

662

#

663

# XXX the _findsnapshots call can be expensive and is "duplicated" with

663

# XXX the _findsnapshots call can be expensive and is "duplicated" with

664

# the one done in `_rawgroups`. Once we start working on performance,

664

# the one done in `_rawgroups`. Once we start working on performance,

665

# we should make the two logics share this computation.

665

# we should make the two logics share this computation.

666

snapshots = collections.defaultdict(list)

666

snapshots = collections.defaultdict(list)

667

_findsnapshots(revlog, snapshots, good + 1)

667

_findsnapshots(revlog, snapshots, good + 1)

668

previous = None

668

previous = None

669

while good != previous:

669

while good != previous:

670

previous = good

670

previous = good

671

children = tuple(sorted(c for c in snapshots[good]))

671

children = tuple(sorted(c for c in snapshots[good]))

672

good = yield children

672

good = yield children

673

674

# we have found nothing

674

# we have found nothing

675

yield None

675

yield None

676

677

def _rawgroups(revlog, p1, p2, cachedelta):

677

def _rawgroups(revlog, p1, p2, cachedelta):

678

"""Provides group of revision to be tested as delta base

678

"""Provides group of revision to be tested as delta base

679

680

This lower level function focus on emitting delta theorically interresting

680

This lower level function focus on emitting delta theorically interresting

681

without looking it any practical details.

681

without looking it any practical details.

682

683

The group order aims at providing fast or small candidates first.

683

The group order aims at providing fast or small candidates first.

684

"""

684

"""

685

gdelta = revlog._generaldelta

685

gdelta = revlog._generaldelta

686

sparse = revlog._sparserevlog

686

sparse = revlog._sparserevlog

687

curr = len(revlog)

687

curr = len(revlog)

688

prev = curr - 1

688

prev = curr - 1

689

deltachain = lambda rev: revlog._deltachain(rev)[0]

689

deltachain = lambda rev: revlog._deltachain(rev)[0]

690

691

if gdelta:

691

if gdelta:

692

# exclude already lazy tested base if any

692

# exclude already lazy tested base if any

693

parents = [p for p in (p1, p2) if p != nullrev]

693

parents = [p for p in (p1, p2) if p != nullrev]

694

695

if not revlog._deltabothparents and len(parents) == 2:

695

if not revlog._deltabothparents and len(parents) == 2:

696

parents.sort()

696

parents.sort()

697

# To minimize the chance of having to build a fulltext,

697

# To minimize the chance of having to build a fulltext,

698

# pick first whichever parent is closest to us (max rev)

698

# pick first whichever parent is closest to us (max rev)

699

yield (parents[1],)

699

yield (parents[1],)

700

# then the other one (min rev) if the first did not fit

700

# then the other one (min rev) if the first did not fit

701

yield (parents[0],)

701

yield (parents[0],)

702

elif len(parents) > 0:

702

elif len(parents) > 0:

703

# Test all parents (1 or 2), and keep the best candidate

703

# Test all parents (1 or 2), and keep the best candidate

704

yield parents

704

yield parents

705

706

if sparse and parents:

706

if sparse and parents:

707

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

707

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

708

# See if we can use an existing snapshot in the parent chains to use as

708

# See if we can use an existing snapshot in the parent chains to use as

709

# a base for a new intermediate-snapshot

709

# a base for a new intermediate-snapshot

710

#

710

#

711

# search for snapshot in parents delta chain

711

# search for snapshot in parents delta chain

712

# map: snapshot-level: snapshot-rev

712

# map: snapshot-level: snapshot-rev

713

parents_snaps = collections.defaultdict(set)

713

parents_snaps = collections.defaultdict(set)

714

candidate_chains = [deltachain(p) for p in parents]

714

candidate_chains = [deltachain(p) for p in parents]

715

for chain in candidate_chains:

715

for chain in candidate_chains:

716

for idx, s in enumerate(chain):

716

for idx, s in enumerate(chain):

717

if not revlog.issnapshot(s):

717

if not revlog.issnapshot(s):

718

break

718

break

719

parents_snaps[idx].add(s)

719

parents_snaps[idx].add(s)

720

snapfloor = min(parents_snaps[0]) + 1

720

snapfloor = min(parents_snaps[0]) + 1

721

_findsnapshots(revlog, snapshots, snapfloor)

721

_findsnapshots(revlog, snapshots, snapfloor)

722

# search for the highest "unrelated" revision

722

# search for the highest "unrelated" revision

723

#

723

#

724

# Adding snapshots used by "unrelated" revision increase the odd we

724

# Adding snapshots used by "unrelated" revision increase the odd we

725

# reuse an independant, yet better snapshot chain.

725

# reuse an independant, yet better snapshot chain.

726

#

726

#

727

# XXX instead of building a set of revisions, we could lazily enumerate

727

# XXX instead of building a set of revisions, we could lazily enumerate

728

# over the chains. That would be more efficient, however we stick to

728

# over the chains. That would be more efficient, however we stick to

729

# simple code for now.

729

# simple code for now.

730

all_revs = set()

730

all_revs = set()

731

for chain in candidate_chains:

731

for chain in candidate_chains:

732

all_revs.update(chain)

732

all_revs.update(chain)

733

other = None

733

other = None

734

for r in revlog.revs(prev, snapfloor):

734

for r in revlog.revs(prev, snapfloor):

735

if r not in all_revs:

735

if r not in all_revs:

736

other = r

736

other = r

737

break

737

break

738

if other is not None:

738

if other is not None:

739

# To avoid unfair competition, we won't use unrelated intermediate

739

# To avoid unfair competition, we won't use unrelated intermediate

740

# snapshot that are deeper than the ones from the parent delta

740

# snapshot that are deeper than the ones from the parent delta

741

# chain.

741

# chain.

742

max_depth = max(parents_snaps.keys())

742

max_depth = max(parents_snaps.keys())

743

chain = deltachain(other)

743

chain = deltachain(other)

744

for idx, s in enumerate(chain):

744

for idx, s in enumerate(chain):

745

if s < snapfloor:

745

if s < snapfloor:

746

continue

746

continue

747

if max_depth < idx:

747

if max_depth < idx:

748

break

748

break

749

if not revlog.issnapshot(s):

749

if not revlog.issnapshot(s):

750

break

750

break

751

parents_snaps[idx].add(s)

751

parents_snaps[idx].add(s)

752

# Test them as possible intermediate snapshot base

752

# Test them as possible intermediate snapshot base

753

# We test them from highest to lowest level. High level one are more

753

# We test them from highest to lowest level. High level one are more

754

# likely to result in small delta

754

# likely to result in small delta

755

floor = None

755

floor = None

756

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

756

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

757

siblings = set()

757

siblings = set()

758

for s in snaps:

758

for s in snaps:

759

siblings.update(snapshots[s])

759

siblings.update(snapshots[s])

760

# Before considering making a new intermediate snapshot, we check

760

# Before considering making a new intermediate snapshot, we check

761

# if an existing snapshot, children of base we consider, would be

761

# if an existing snapshot, children of base we consider, would be

762

# suitable.

762

# suitable.

763

#

763

#

764

# It give a change to reuse a delta chain "unrelated" to the

764

# It give a change to reuse a delta chain "unrelated" to the

765

# current revision instead of starting our own. Without such

765

# current revision instead of starting our own. Without such

766

# re-use, topological branches would keep reopening new chains.

766

# re-use, topological branches would keep reopening new chains.

767

# Creating more and more snapshot as the repository grow.

767

# Creating more and more snapshot as the repository grow.

768

769

if floor is not None:

769

if floor is not None:

770

# We only do this for siblings created after the one in our

770

# We only do this for siblings created after the one in our

771

# parent's delta chain. Those created before has less chances

771

# parent's delta chain. Those created before has less chances

772

# to be valid base since our ancestors had to create a new

772

# to be valid base since our ancestors had to create a new

773

# snapshot.

773

# snapshot.

774

siblings = [r for r in siblings if floor < r]

774

siblings = [r for r in siblings if floor < r]

775

yield tuple(sorted(siblings))

775

yield tuple(sorted(siblings))

776

# then test the base from our parent's delta chain.

776

# then test the base from our parent's delta chain.

777

yield tuple(sorted(snaps))

777

yield tuple(sorted(snaps))

778

floor = min(snaps)

778

floor = min(snaps)

779

# No suitable base found in the parent chain, search if any full

779

# No suitable base found in the parent chain, search if any full

780

# snapshots emitted since parent's base would be a suitable base for an

780

# snapshots emitted since parent's base would be a suitable base for an

781

# intermediate snapshot.

781

# intermediate snapshot.

782

#

782

#

783

# It give a chance to reuse a delta chain unrelated to the current

783

# It give a chance to reuse a delta chain unrelated to the current

784

# revisions instead of starting our own. Without such re-use,

784

# revisions instead of starting our own. Without such re-use,

785

# topological branches would keep reopening new full chains. Creating

785

# topological branches would keep reopening new full chains. Creating

786

# more and more snapshot as the repository grow.

786

# more and more snapshot as the repository grow.

787

yield tuple(snapshots[nullrev])

787

yield tuple(snapshots[nullrev])

788

789

if not sparse:

789

if not sparse:

790

# other approach failed try against prev to hopefully save us a

790

# other approach failed try against prev to hopefully save us a

791

# fulltext.

791

# fulltext.

792

yield (prev,)

792

yield (prev,)

793

794

class deltacomputer(object):

794

class deltacomputer(object):

795

def __init__(self, revlog):

795

def __init__(self, revlog):

796

self.revlog = revlog

796

self.revlog = revlog

797

798

def buildtext(self, revinfo, fh):

798

def buildtext(self, revinfo, fh):

799

"""Builds a fulltext version of a revision

799

"""Builds a fulltext version of a revision

800

801

revinfo: _revisioninfo instance that contains all needed info

801

revinfo: _revisioninfo instance that contains all needed info

802

fh: file handle to either the .i or the .d revlog file,

802

fh: file handle to either the .i or the .d revlog file,

803

depending on whether it is inlined or not

803

depending on whether it is inlined or not

804

"""

804

"""

805

btext = revinfo.btext

805

btext = revinfo.btext

806

if btext[0] is not None:

806

if btext[0] is not None:

807

return btext[0]

807

return btext[0]

808

809

revlog = self.revlog

809

revlog = self.revlog

810

cachedelta = revinfo.cachedelta

810

cachedelta = revinfo.cachedelta

811

baserev = cachedelta[0]

811

baserev = cachedelta[0]

812

delta = cachedelta[1]

812

delta = cachedelta[1]

813

814

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

814

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

815

revinfo.p1, revinfo.p2,

815

revinfo.p1, revinfo.p2,

816

revinfo.flags, revinfo.node)

816

revinfo.flags, revinfo.node)

817

return fulltext

817

return fulltext

818

819

def _builddeltadiff(self, base, revinfo, fh):

819

def _builddeltadiff(self, base, revinfo, fh):

820

revlog = self.revlog

820

revlog = self.revlog

821

t = self.buildtext(revinfo, fh)

821

t = self.buildtext(revinfo, fh)

822

if revlog.iscensored(base):

822

if revlog.iscensored(base):

823

# deltas based on a censored revision must replace the

823

# deltas based on a censored revision must replace the

824

# full content in one patch, so delta works everywhere

824

# full content in one patch, so delta works everywhere

825

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

825

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

826

delta = header + t

826

delta = header + t

827

else:

827

else:

828

ptext = revlog.revision(base, _df=fh, raw=True)

828

ptext = revlog.revision(base, _df=fh, raw=True)

829

delta = mdiff.textdiff(ptext, t)

829

delta = mdiff.textdiff(ptext, t)

830

831

return delta

831

return delta

832

833

def _builddeltainfo(self, revinfo, base, fh):

833

def _builddeltainfo(self, revinfo, base, fh):

834

# can we use the cached delta?

834

# can we use the cached delta?

835

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

835

delta = None

836

~~delta~~ = revinfo.cachedelta[1]

836

if revinfo.cachedelta:

837

else:

837

cachebase, cachediff = revinfo.cachedelta

838

#check if the diff still apply

839

currentbase = cachebase

840

while (currentbase != nullrev

841

and currentbase != base

842

and self.revlog.length(currentbase) == 0):

843

currentbase = self.revlog.deltaparent(currentbase)

844

if currentbase == base:

845

delta = revinfo.cachedelta[1]

846

if delta is None:

838

delta = self._builddeltadiff(base, revinfo, fh)

847

delta = self._builddeltadiff(base, revinfo, fh)

839

revlog = self.revlog

848

revlog = self.revlog

840

header, data = revlog.compress(delta)

849

header, data = revlog.compress(delta)

841

deltalen = len(header) + len(data)

850

deltalen = len(header) + len(data)

842

chainbase = revlog.chainbase(base)

851

chainbase = revlog.chainbase(base)

843

offset = revlog.end(len(revlog) - 1)

852

offset = revlog.end(len(revlog) - 1)

844

dist = deltalen + offset - revlog.start(chainbase)

853

dist = deltalen + offset - revlog.start(chainbase)

845

if revlog._generaldelta:

854

if revlog._generaldelta:

846

deltabase = base

855

deltabase = base

847

else:

856

else:

848

deltabase = chainbase

857

deltabase = chainbase

849

chainlen, compresseddeltalen = revlog._chaininfo(base)

858

chainlen, compresseddeltalen = revlog._chaininfo(base)

850

chainlen += 1

859

chainlen += 1

851

compresseddeltalen += deltalen

860

compresseddeltalen += deltalen

852

861

853

revlog = self.revlog

862

revlog = self.revlog

854

snapshotdepth = None

863

snapshotdepth = None

855

if deltabase == nullrev:

864

if deltabase == nullrev:

856

snapshotdepth = 0

865

snapshotdepth = 0

857

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

866

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

858

# A delta chain should always be one full snapshot,

867

# A delta chain should always be one full snapshot,

859

# zero or more semi-snapshots, and zero or more deltas

868

# zero or more semi-snapshots, and zero or more deltas

860

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

869

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

861

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

870

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

862

snapshotdepth = len(revlog._deltachain(deltabase)[0])

871

snapshotdepth = len(revlog._deltachain(deltabase)[0])

863

872

864

return _deltainfo(dist, deltalen, (header, data), deltabase,

873

return _deltainfo(dist, deltalen, (header, data), deltabase,

865

chainbase, chainlen, compresseddeltalen,

874

chainbase, chainlen, compresseddeltalen,

866

snapshotdepth)

875

snapshotdepth)

867

876

868

def _fullsnapshotinfo(self, fh, revinfo):

877

def _fullsnapshotinfo(self, fh, revinfo):

869

curr = len(self.revlog)

878

curr = len(self.revlog)

870

rawtext = self.buildtext(revinfo, fh)

879

rawtext = self.buildtext(revinfo, fh)

871

data = self.revlog.compress(rawtext)

880

data = self.revlog.compress(rawtext)

872

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

881

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

873

deltabase = chainbase = curr

882

deltabase = chainbase = curr

874

snapshotdepth = 0

883

snapshotdepth = 0

875

chainlen = 1

884

chainlen = 1

876

885

877

return _deltainfo(dist, deltalen, data, deltabase,

886

return _deltainfo(dist, deltalen, data, deltabase,

878

chainbase, chainlen, compresseddeltalen,

887

chainbase, chainlen, compresseddeltalen,

879

snapshotdepth)

888

snapshotdepth)

880

889

881

def finddeltainfo(self, revinfo, fh):

890

def finddeltainfo(self, revinfo, fh):

882

"""Find an acceptable delta against a candidate revision

891

"""Find an acceptable delta against a candidate revision

883

892

884

revinfo: information about the revision (instance of _revisioninfo)

893

revinfo: information about the revision (instance of _revisioninfo)

885

fh: file handle to either the .i or the .d revlog file,

894

fh: file handle to either the .i or the .d revlog file,

886

depending on whether it is inlined or not

895

depending on whether it is inlined or not

887

896

888

Returns the first acceptable candidate revision, as ordered by

897

Returns the first acceptable candidate revision, as ordered by

889

_candidategroups

898

_candidategroups

890

899

891

If no suitable deltabase is found, we return delta info for a full

900

If no suitable deltabase is found, we return delta info for a full

892

snapshot.

901

snapshot.

893

"""

902

"""

894

if not revinfo.textlen:

903

if not revinfo.textlen:

895

return self._fullsnapshotinfo(fh, revinfo)

904

return self._fullsnapshotinfo(fh, revinfo)

896

905

897

# no delta for flag processor revision (see "candelta" for why)

906

# no delta for flag processor revision (see "candelta" for why)

898

# not calling candelta since only one revision needs test, also to

907

# not calling candelta since only one revision needs test, also to

899

# avoid overhead fetching flags again.

908

# avoid overhead fetching flags again.

900

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

909

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

901

return self._fullsnapshotinfo(fh, revinfo)

910

return self._fullsnapshotinfo(fh, revinfo)

902

911

903

cachedelta = revinfo.cachedelta

912

cachedelta = revinfo.cachedelta

904

p1 = revinfo.p1

913

p1 = revinfo.p1

905

p2 = revinfo.p2

914

p2 = revinfo.p2

906

revlog = self.revlog

915

revlog = self.revlog

907

916

908

deltainfo = None

917

deltainfo = None

909

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

918

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

910

groups = _candidategroups(self.revlog, revinfo.textlen,

919

groups = _candidategroups(self.revlog, revinfo.textlen,

911

p1r, p2r, cachedelta)

920

p1r, p2r, cachedelta)

912

candidaterevs = next(groups)

921

candidaterevs = next(groups)

913

while candidaterevs is not None:

922

while candidaterevs is not None:

914

nominateddeltas = []

923

nominateddeltas = []

915

if deltainfo is not None:

924

if deltainfo is not None:

916

# if we already found a good delta,

925

# if we already found a good delta,

917

# challenge it against refined candidates

926

# challenge it against refined candidates

918

nominateddeltas.append(deltainfo)

927

nominateddeltas.append(deltainfo)

919

for candidaterev in candidaterevs:

928

for candidaterev in candidaterevs:

920

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

929

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

921

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

930

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

922

nominateddeltas.append(candidatedelta)

931

nominateddeltas.append(candidatedelta)

923

if nominateddeltas:

932

if nominateddeltas:

924

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

933

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

925

if deltainfo is not None:

934

if deltainfo is not None:

926

candidaterevs = groups.send(deltainfo.base)

935

candidaterevs = groups.send(deltainfo.base)

927

else:

936

else:

928

candidaterevs = next(groups)

937

candidaterevs = next(groups)

929

938

930

if deltainfo is None:

939

if deltainfo is None:

931

deltainfo = self._fullsnapshotinfo(fh, revinfo)

940

deltainfo = self._fullsnapshotinfo(fh, revinfo)

932

return deltainfo

941

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             RevlogError = error.RevlogError
             CensoredNodeError = error.CensoredNodeError
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise RevlogError(_('node %s is not censored') % expectednode)
                 except CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 if revlog._sparserevlog:
                     # As sparse-read will be used, we can consider that the distance,
                     # instead of being the span of the whole chunk,
                     # is the span of the largest read chunk
                     base = deltainfo.base
                     if base != nullrev:
                         deltachain = revlog._deltachain(base)[0]
                     else:
                         deltachain = []
                     # search for the first non-snapshot revision
                     for idx, r in enumerate(deltachain):
                         if not revlog.issnapshot(r):
                             break
                     deltachain = deltachain[idx:]
                     chunks = slicechunk(revlog, deltachain, deltainfo)
                     all_span = [segmentspan(revlog, revs, deltainfo)
                                 for revs in chunks]
                     distance = max(all_span)
                 else:
                     distance = deltainfo.distance
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
                     # In multiple place, we are ignoring irrelevant data range below a
                     # certain size. Be also apply this tradeoff here and relax span
                     # constraint for small enought content.
                     maxdist = revlog._srmingapsize
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 if maxdist < distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while not (rev == nullrev or rev in tested or deltalength(rev)):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
-                    if revinfo.cachedelta and revinfo.cachedelta[0] == base:
+                    delta = None
-                        delta = revinfo.cachedelta[1]
+                    if revinfo.cachedelta:
-                    else:
+                        cachebase, cachediff = revinfo.cachedelta
+                        #check if the diff still apply
+                        currentbase = cachebase
+                        while (currentbase != nullrev
+                                and currentbase != base
+                                and self.revlog.length(currentbase) == 0):
+                            currentbase = self.revlog.deltaparent(currentbase)
+                        if currentbase == base:
+                            delta = revinfo.cachedelta[1]
+                    if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo