upstream/mercurial-mirror Commit - r41062:b3734779

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

# maximum <delta-chain-data>/<revision-text-length> ratio

35

# maximum <delta-chain-data>/<revision-text-length> ratio

36

LIMIT_DELTA2TEXT = 2

36

LIMIT_DELTA2TEXT = 2

37

38

class _testrevlog(object):

38

class _testrevlog(object):

39

"""minimalist fake revlog to use in doctests"""

39

"""minimalist fake revlog to use in doctests"""

40

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

"""data is an list of revision payload boundaries"""

42

"""data is an list of revision payload boundaries"""

43

self._data = data

43

self._data = data

44

self._srdensitythreshold = density

44

self._srdensitythreshold = density

45

self._srmingapsize = mingap

45

self._srmingapsize = mingap

46

self._snapshot = set(snapshot)

46

self._snapshot = set(snapshot)

47

self.index = None

47

self.index = None

48

49

def start(self, rev):

49

def start(self, rev):

50

if rev == 0:

50

if rev == 0:

51

return 0

51

return 0

52

return self._data[rev - 1]

52

return self._data[rev - 1]

53

54

def end(self, rev):

54

def end(self, rev):

55

return self._data[rev]

55

return self._data[rev]

56

57

def length(self, rev):

57

def length(self, rev):

58

return self.end(rev) - self.start(rev)

58

return self.end(rev) - self.start(rev)

59

60

def __len__(self):

60

def __len__(self):

61

return len(self._data)

61

return len(self._data)

62

63

def issnapshot(self, rev):

63

def issnapshot(self, rev):

64

return rev in self._snapshot

64

return rev in self._snapshot

65

66

def slicechunk(revlog, revs, targetsize=None):

66

def slicechunk(revlog, revs, targetsize=None):

67

"""slice revs to reduce the amount of unrelated data to be read from disk.

67

"""slice revs to reduce the amount of unrelated data to be read from disk.

68

69

``revs`` is sliced into groups that should be read in one time.

69

``revs`` is sliced into groups that should be read in one time.

70

Assume that revs are sorted.

70

Assume that revs are sorted.

71

72

The initial chunk is sliced until the overall density (payload/chunks-span

72

The initial chunk is sliced until the overall density (payload/chunks-span

73

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

73

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

74

`revlog._srmingapsize` is skipped.

74

`revlog._srmingapsize` is skipped.

75

76

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

76

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

77

For consistency with other slicing choice, this limit won't go lower than

77

For consistency with other slicing choice, this limit won't go lower than

78

`revlog._srmingapsize`.

78

`revlog._srmingapsize`.

79

80

If individual revisions chunk are larger than this limit, they will still

80

If individual revisions chunk are larger than this limit, they will still

81

be raised individually.

81

be raised individually.

82

83

>>> data = [

83

>>> data = [

84

... 5, #00 (5)

84

... 5, #00 (5)

85

... 10, #01 (5)

85

... 10, #01 (5)

86

... 12, #02 (2)

86

... 12, #02 (2)

87

... 12, #03 (empty)

87

... 12, #03 (empty)

88

... 27, #04 (15)

88

... 27, #04 (15)

89

... 31, #05 (4)

89

... 31, #05 (4)

90

... 31, #06 (empty)

90

... 31, #06 (empty)

91

... 42, #07 (11)

91

... 42, #07 (11)

92

... 47, #08 (5)

92

... 47, #08 (5)

93

... 47, #09 (empty)

93

... 47, #09 (empty)

94

... 48, #10 (1)

94

... 48, #10 (1)

95

... 51, #11 (3)

95

... 51, #11 (3)

96

... 74, #12 (23)

96

... 74, #12 (23)

97

... 85, #13 (11)

97

... 85, #13 (11)

98

... 86, #14 (1)

98

... 86, #14 (1)

99

... 91, #15 (5)

99

... 91, #15 (5)

100

... ]

100

... ]

101

>>> revlog = _testrevlog(data, snapshot=range(16))

101

>>> revlog = _testrevlog(data, snapshot=range(16))

102

103

>>> list(slicechunk(revlog, list(range(16))))

103

>>> list(slicechunk(revlog, list(range(16))))

104

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

104

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

105

>>> list(slicechunk(revlog, [0, 15]))

105

>>> list(slicechunk(revlog, [0, 15]))

106

[[0], [15]]

106

[[0], [15]]

107

>>> list(slicechunk(revlog, [0, 11, 15]))

107

>>> list(slicechunk(revlog, [0, 11, 15]))

108

[[0], [11], [15]]

108

[[0], [11], [15]]

109

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

109

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

110

[[0], [11, 13, 15]]

110

[[0], [11, 13, 15]]

111

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

111

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

112

[[1, 2], [5, 8, 10, 11], [14]]

112

[[1, 2], [5, 8, 10, 11], [14]]

113

114

Slicing with a maximum chunk size

114

Slicing with a maximum chunk size

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

116

[[0], [11], [13], [15]]

116

[[0], [11], [13], [15]]

117

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

117

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

118

[[0], [11], [13, 15]]

118

[[0], [11], [13, 15]]

119

"""

119

"""

120

if targetsize is not None:

120

if targetsize is not None:

121

targetsize = max(targetsize, revlog._srmingapsize)

121

targetsize = max(targetsize, revlog._srmingapsize)

122

# targetsize should not be specified when evaluating delta candidates:

122

# targetsize should not be specified when evaluating delta candidates:

123

# * targetsize is used to ensure we stay within specification when reading,

123

# * targetsize is used to ensure we stay within specification when reading,

124

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

124

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

125

if densityslicing is None:

125

if densityslicing is None:

126

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

126

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

127

for chunk in densityslicing(revs,

127

for chunk in densityslicing(revs,

128

revlog._srdensitythreshold,

128

revlog._srdensitythreshold,

129

revlog._srmingapsize):

129

revlog._srmingapsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

131

yield subchunk

131

yield subchunk

132

133

def _slicechunktosize(revlog, revs, targetsize=None):

133

def _slicechunktosize(revlog, revs, targetsize=None):

134

"""slice revs to match the target size

134

"""slice revs to match the target size

135

136

This is intended to be used on chunk that density slicing selected by that

136

This is intended to be used on chunk that density slicing selected by that

137

are still too large compared to the read garantee of revlog. This might

137

are still too large compared to the read garantee of revlog. This might

138

happens when "minimal gap size" interrupted the slicing or when chain are

138

happens when "minimal gap size" interrupted the slicing or when chain are

139

built in a way that create large blocks next to each other.

139

built in a way that create large blocks next to each other.

140

141

>>> data = [

141

>>> data = [

142

... 3, #0 (3)

142

... 3, #0 (3)

143

... 5, #1 (2)

143

... 5, #1 (2)

144

... 6, #2 (1)

144

... 6, #2 (1)

145

... 8, #3 (2)

145

... 8, #3 (2)

146

... 8, #4 (empty)

146

... 8, #4 (empty)

147

... 11, #5 (3)

147

... 11, #5 (3)

148

... 12, #6 (1)

148

... 12, #6 (1)

149

... 13, #7 (1)

149

... 13, #7 (1)

150

... 14, #8 (1)

150

... 14, #8 (1)

151

... ]

151

... ]

152

153

== All snapshots cases ==

153

== All snapshots cases ==

154

>>> revlog = _testrevlog(data, snapshot=range(9))

154

>>> revlog = _testrevlog(data, snapshot=range(9))

155

156

Cases where chunk is already small enough

156

Cases where chunk is already small enough

157

>>> list(_slicechunktosize(revlog, [0], 3))

157

>>> list(_slicechunktosize(revlog, [0], 3))

158

[[0]]

158

[[0]]

159

>>> list(_slicechunktosize(revlog, [6, 7], 3))

159

>>> list(_slicechunktosize(revlog, [6, 7], 3))

160

[[6, 7]]

160

[[6, 7]]

161

>>> list(_slicechunktosize(revlog, [0], None))

161

>>> list(_slicechunktosize(revlog, [0], None))

162

[[0]]

162

[[0]]

163

>>> list(_slicechunktosize(revlog, [6, 7], None))

163

>>> list(_slicechunktosize(revlog, [6, 7], None))

164

[[6, 7]]

164

[[6, 7]]

165

166

cases where we need actual slicing

166

cases where we need actual slicing

167

>>> list(_slicechunktosize(revlog, [0, 1], 3))

167

>>> list(_slicechunktosize(revlog, [0, 1], 3))

168

[[0], [1]]

168

[[0], [1]]

169

>>> list(_slicechunktosize(revlog, [1, 3], 3))

169

>>> list(_slicechunktosize(revlog, [1, 3], 3))

170

[[1], [3]]

170

[[1], [3]]

171

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

171

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

172

[[1, 2], [3]]

172

[[1, 2], [3]]

173

>>> list(_slicechunktosize(revlog, [3, 5], 3))

173

>>> list(_slicechunktosize(revlog, [3, 5], 3))

174

[[3], [5]]

174

[[3], [5]]

175

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

175

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

176

[[3], [5]]

176

[[3], [5]]

177

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

177

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

178

[[5], [6, 7, 8]]

178

[[5], [6, 7, 8]]

179

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

179

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

180

[[0], [1, 2], [3], [5], [6, 7, 8]]

180

[[0], [1, 2], [3], [5], [6, 7, 8]]

181

182

Case with too large individual chunk (must return valid chunk)

182

Case with too large individual chunk (must return valid chunk)

183

>>> list(_slicechunktosize(revlog, [0, 1], 2))

183

>>> list(_slicechunktosize(revlog, [0, 1], 2))

184

[[0], [1]]

184

[[0], [1]]

185

>>> list(_slicechunktosize(revlog, [1, 3], 1))

185

>>> list(_slicechunktosize(revlog, [1, 3], 1))

186

[[1], [3]]

186

[[1], [3]]

187

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

187

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

188

[[3], [5]]

188

[[3], [5]]

189

190

== No Snapshot cases ==

190

== No Snapshot cases ==

191

>>> revlog = _testrevlog(data)

191

>>> revlog = _testrevlog(data)

192

193

Cases where chunk is already small enough

193

Cases where chunk is already small enough

194

>>> list(_slicechunktosize(revlog, [0], 3))

194

>>> list(_slicechunktosize(revlog, [0], 3))

195

[[0]]

195

[[0]]

196

>>> list(_slicechunktosize(revlog, [6, 7], 3))

196

>>> list(_slicechunktosize(revlog, [6, 7], 3))

197

[[6, 7]]

197

[[6, 7]]

198

>>> list(_slicechunktosize(revlog, [0], None))

198

>>> list(_slicechunktosize(revlog, [0], None))

199

[[0]]

199

[[0]]

200

>>> list(_slicechunktosize(revlog, [6, 7], None))

200

>>> list(_slicechunktosize(revlog, [6, 7], None))

201

[[6, 7]]

201

[[6, 7]]

202

203

cases where we need actual slicing

203

cases where we need actual slicing

204

>>> list(_slicechunktosize(revlog, [0, 1], 3))

204

>>> list(_slicechunktosize(revlog, [0, 1], 3))

205

[[0], [1]]

205

[[0], [1]]

206

>>> list(_slicechunktosize(revlog, [1, 3], 3))

206

>>> list(_slicechunktosize(revlog, [1, 3], 3))

207

[[1], [3]]

207

[[1], [3]]

208

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

208

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

209

[[1], [2, 3]]

209

[[1], [2, 3]]

210

>>> list(_slicechunktosize(revlog, [3, 5], 3))

210

>>> list(_slicechunktosize(revlog, [3, 5], 3))

211

[[3], [5]]

211

[[3], [5]]

212

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

212

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

213

[[3], [4, 5]]

213

[[3], [4, 5]]

214

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

214

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

215

[[5], [6, 7, 8]]

215

[[5], [6, 7, 8]]

216

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

216

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

217

[[0], [1, 2], [3], [5], [6, 7, 8]]

217

[[0], [1, 2], [3], [5], [6, 7, 8]]

218

219

Case with too large individual chunk (must return valid chunk)

219

Case with too large individual chunk (must return valid chunk)

220

>>> list(_slicechunktosize(revlog, [0, 1], 2))

220

>>> list(_slicechunktosize(revlog, [0, 1], 2))

221

[[0], [1]]

221

[[0], [1]]

222

>>> list(_slicechunktosize(revlog, [1, 3], 1))

222

>>> list(_slicechunktosize(revlog, [1, 3], 1))

223

[[1], [3]]

223

[[1], [3]]

224

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

224

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

225

[[3], [5]]

225

[[3], [5]]

226

227

== mixed case ==

227

== mixed case ==

228

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

228

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

229

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

229

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

230

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

230

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

231

"""

231

"""

232

assert targetsize is None or 0 <= targetsize

232

assert targetsize is None or 0 <= targetsize

233

startdata = revlog.start(revs[0])

233

startdata = revlog.start(revs[0])

234

enddata = revlog.end(revs[-1])

234

enddata = revlog.end(revs[-1])

235

fullspan = enddata - startdata

235

fullspan = enddata - startdata

236

if targetsize is None or fullspan <= targetsize:

236

if targetsize is None or fullspan <= targetsize:

237

yield revs

237

yield revs

238

return

238

return

239

240

startrevidx = 0

240

startrevidx = 0

241

endrevidx = 1

241

endrevidx = 1

242

iterrevs = enumerate(revs)

242

iterrevs = enumerate(revs)

243

next(iterrevs) # skip first rev.

243

next(iterrevs) # skip first rev.

244

# first step: get snapshots out of the way

244

# first step: get snapshots out of the way

245

for idx, r in iterrevs:

245

for idx, r in iterrevs:

246

span = revlog.end(r) - startdata

246

span = revlog.end(r) - startdata

247

snapshot = revlog.issnapshot(r)

247

snapshot = revlog.issnapshot(r)

248

if span <= targetsize and snapshot:

248

if span <= targetsize and snapshot:

249

endrevidx = idx + 1

249

endrevidx = idx + 1

250

else:

250

else:

251

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

251

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

252

if chunk:

252

if chunk:

253

yield chunk

253

yield chunk

254

startrevidx = idx

254

startrevidx = idx

255

startdata = revlog.start(r)

255

startdata = revlog.start(r)

256

endrevidx = idx + 1

256

endrevidx = idx + 1

257

if not snapshot:

257

if not snapshot:

258

break

258

break

259

260

# for the others, we use binary slicing to quickly converge toward valid

260

# for the others, we use binary slicing to quickly converge toward valid

261

# chunks (otherwise, we might end up looking for start/end of many

261

# chunks (otherwise, we might end up looking for start/end of many

262

# revisions). This logic is not looking for the perfect slicing point, it

262

# revisions). This logic is not looking for the perfect slicing point, it

263

# focuses on quickly converging toward valid chunks.

263

# focuses on quickly converging toward valid chunks.

264

nbitem = len(revs)

264

nbitem = len(revs)

265

while (enddata - startdata) > targetsize:

265

while (enddata - startdata) > targetsize:

266

endrevidx = nbitem

266

endrevidx = nbitem

267

if nbitem - startrevidx <= 1:

267

if nbitem - startrevidx <= 1:

268

break # protect against individual chunk larger than limit

268

break # protect against individual chunk larger than limit

269

localenddata = revlog.end(revs[endrevidx - 1])

269

localenddata = revlog.end(revs[endrevidx - 1])

270

span = localenddata - startdata

270

span = localenddata - startdata

271

while span > targetsize:

271

while span > targetsize:

272

if endrevidx - startrevidx <= 1:

272

if endrevidx - startrevidx <= 1:

273

break # protect against individual chunk larger than limit

273

break # protect against individual chunk larger than limit

274

endrevidx -= (endrevidx - startrevidx) // 2

274

endrevidx -= (endrevidx - startrevidx) // 2

275

localenddata = revlog.end(revs[endrevidx - 1])

275

localenddata = revlog.end(revs[endrevidx - 1])

276

span = localenddata - startdata

276

span = localenddata - startdata

277

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

277

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

278

if chunk:

278

if chunk:

279

yield chunk

279

yield chunk

280

startrevidx = endrevidx

280

startrevidx = endrevidx

281

startdata = revlog.start(revs[startrevidx])

281

startdata = revlog.start(revs[startrevidx])

282

283

chunk = _trimchunk(revlog, revs, startrevidx)

283

chunk = _trimchunk(revlog, revs, startrevidx)

284

if chunk:

284

if chunk:

285

yield chunk

285

yield chunk

286

287

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

287

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

288

mingapsize=0):

288

mingapsize=0):

289

"""slice revs to reduce the amount of unrelated data to be read from disk.

289

"""slice revs to reduce the amount of unrelated data to be read from disk.

290

291

``revs`` is sliced into groups that should be read in one time.

291

``revs`` is sliced into groups that should be read in one time.

292

Assume that revs are sorted.

292

Assume that revs are sorted.

293

294

The initial chunk is sliced until the overall density (payload/chunks-span

294

The initial chunk is sliced until the overall density (payload/chunks-span

295

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

295

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

296

skipped.

296

skipped.

297

298

>>> revlog = _testrevlog([

298

>>> revlog = _testrevlog([

299

... 5, #00 (5)

299

... 5, #00 (5)

300

... 10, #01 (5)

300

... 10, #01 (5)

301

... 12, #02 (2)

301

... 12, #02 (2)

302

... 12, #03 (empty)

302

... 12, #03 (empty)

303

... 27, #04 (15)

303

... 27, #04 (15)

304

... 31, #05 (4)

304

... 31, #05 (4)

305

... 31, #06 (empty)

305

... 31, #06 (empty)

306

... 42, #07 (11)

306

... 42, #07 (11)

307

... 47, #08 (5)

307

... 47, #08 (5)

308

... 47, #09 (empty)

308

... 47, #09 (empty)

309

... 48, #10 (1)

309

... 48, #10 (1)

310

... 51, #11 (3)

310

... 51, #11 (3)

311

... 74, #12 (23)

311

... 74, #12 (23)

312

... 85, #13 (11)

312

... 85, #13 (11)

313

... 86, #14 (1)

313

... 86, #14 (1)

314

... 91, #15 (5)

314

... 91, #15 (5)

315

... ])

315

... ])

316

317

>>> list(_slicechunktodensity(revlog, list(range(16))))

317

>>> list(_slicechunktodensity(revlog, list(range(16))))

318

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

318

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

319

>>> list(_slicechunktodensity(revlog, [0, 15]))

319

>>> list(_slicechunktodensity(revlog, [0, 15]))

320

[[0], [15]]

320

[[0], [15]]

321

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

321

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

322

[[0], [11], [15]]

322

[[0], [11], [15]]

323

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

323

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

324

[[0], [11, 13, 15]]

324

[[0], [11, 13, 15]]

325

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

325

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

326

[[1, 2], [5, 8, 10, 11], [14]]

326

[[1, 2], [5, 8, 10, 11], [14]]

327

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

327

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

328

... mingapsize=20))

328

... mingapsize=20))

329

[[1, 2, 3, 5, 8, 10, 11], [14]]

329

[[1, 2, 3, 5, 8, 10, 11], [14]]

330

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

330

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

331

... targetdensity=0.95))

331

... targetdensity=0.95))

332

[[1, 2], [5], [8, 10, 11], [14]]

332

[[1, 2], [5], [8, 10, 11], [14]]

333

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

333

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

334

... targetdensity=0.95, mingapsize=12))

334

... targetdensity=0.95, mingapsize=12))

335

[[1, 2], [5, 8, 10, 11], [14]]

335

[[1, 2], [5, 8, 10, 11], [14]]

336

"""

336

"""

337

start = revlog.start

337

start = revlog.start

338

length = revlog.length

338

length = revlog.length

339

340

if len(revs) <= 1:

340

if len(revs) <= 1:

341

yield revs

341

yield revs

342

return

342

return

343

344

deltachainspan = segmentspan(revlog, revs)

344

deltachainspan = segmentspan(revlog, revs)

345

346

if deltachainspan < mingapsize:

346

if deltachainspan < mingapsize:

347

yield revs

347

yield revs

348

return

348

return

349

350

readdata = deltachainspan

350

readdata = deltachainspan

351

chainpayload = sum(length(r) for r in revs)

351

chainpayload = sum(length(r) for r in revs)

352

353

if deltachainspan:

353

if deltachainspan:

354

density = chainpayload / float(deltachainspan)

354

density = chainpayload / float(deltachainspan)

355

else:

355

else:

356

density = 1.0

356

density = 1.0

357

358

if density >= targetdensity:

358

if density >= targetdensity:

359

yield revs

359

yield revs

360

return

360

return

361

362

# Store the gaps in a heap to have them sorted by decreasing size

362

# Store the gaps in a heap to have them sorted by decreasing size

363

gaps = []

363

gaps = []

364

prevend = None

364

prevend = None

365

for i, rev in enumerate(revs):

365

for i, rev in enumerate(revs):

366

revstart = start(rev)

366

revstart = start(rev)

367

revlen = length(rev)

367

revlen = length(rev)

368

369

# Skip empty revisions to form larger holes

369

# Skip empty revisions to form larger holes

370

if revlen == 0:

370

if revlen == 0:

371

continue

371

continue

372

373

if prevend is not None:

373

if prevend is not None:

374

gapsize = revstart - prevend

374

gapsize = revstart - prevend

375

# only consider holes that are large enough

375

# only consider holes that are large enough

376

if gapsize > mingapsize:

376

if gapsize > mingapsize:

377

gaps.append((gapsize, i))

377

gaps.append((gapsize, i))

378

379

prevend = revstart + revlen

379

prevend = revstart + revlen

380

# sort the gaps to pop them from largest to small

380

# sort the gaps to pop them from largest to small

381

gaps.sort()

381

gaps.sort()

382

383

# Collect the indices of the largest holes until the density is acceptable

383

# Collect the indices of the largest holes until the density is acceptable

384

selected = []

384

selected = []

385

while gaps and density < targetdensity:

385

while gaps and density < targetdensity:

386

gapsize, gapidx = gaps.pop()

386

gapsize, gapidx = gaps.pop()

387

388

selected.append(gapidx)

388

selected.append(gapidx)

389

390

# the gap sizes are stored as negatives to be sorted decreasingly

390

# the gap sizes are stored as negatives to be sorted decreasingly

391

# by the heap

391

# by the heap

392

readdata -= gapsize

392

readdata -= gapsize

393

if readdata > 0:

393

if readdata > 0:

394

density = chainpayload / float(readdata)

394

density = chainpayload / float(readdata)

395

else:

395

else:

396

density = 1.0

396

density = 1.0

397

selected.sort()

397

selected.sort()

398

399

# Cut the revs at collected indices

399

# Cut the revs at collected indices

400

previdx = 0

400

previdx = 0

401

for idx in selected:

401

for idx in selected:

402

403

chunk = _trimchunk(revlog, revs, previdx, idx)

403

chunk = _trimchunk(revlog, revs, previdx, idx)

404

if chunk:

404

if chunk:

405

yield chunk

405

yield chunk

406

407

previdx = idx

407

previdx = idx

408

409

chunk = _trimchunk(revlog, revs, previdx)

409

chunk = _trimchunk(revlog, revs, previdx)

410

if chunk:

410

if chunk:

411

yield chunk

411

yield chunk

412

413

def _trimchunk(revlog, revs, startidx, endidx=None):

413

def _trimchunk(revlog, revs, startidx, endidx=None):

414

"""returns revs[startidx:endidx] without empty trailing revs

414

"""returns revs[startidx:endidx] without empty trailing revs

415

416

Doctest Setup

416

Doctest Setup

417

>>> revlog = _testrevlog([

417

>>> revlog = _testrevlog([

418

... 5, #0

418

... 5, #0

419

... 10, #1

419

... 10, #1

420

... 12, #2

420

... 12, #2

421

... 12, #3 (empty)

421

... 12, #3 (empty)

422

... 17, #4

422

... 17, #4

423

... 21, #5

423

... 21, #5

424

... 21, #6 (empty)

424

... 21, #6 (empty)

425

... ])

425

... ])

426

427

Contiguous cases:

427

Contiguous cases:

428

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

428

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

429

[0, 1, 2, 3, 4, 5]

429

[0, 1, 2, 3, 4, 5]

430

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

430

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

431

[0, 1, 2, 3, 4]

431

[0, 1, 2, 3, 4]

432

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

432

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

433

[0, 1, 2]

433

[0, 1, 2]

434

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

434

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

435

[2]

435

[2]

436

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

436

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

437

[3, 4, 5]

437

[3, 4, 5]

438

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

438

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

439

[3, 4]

439

[3, 4]

440

441

Discontiguous cases:

441

Discontiguous cases:

442

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

442

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

443

[1, 3, 5]

443

[1, 3, 5]

444

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

444

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

445

[1]

445

[1]

446

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

446

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

447

[3, 5]

447

[3, 5]

448

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

448

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

449

[3, 5]

449

[3, 5]

450

"""

450

"""

451

length = revlog.length

451

length = revlog.length

452

453

if endidx is None:

453

if endidx is None:

454

endidx = len(revs)

454

endidx = len(revs)

455

456

# If we have a non-emtpy delta candidate, there are nothing to trim

456

# If we have a non-emtpy delta candidate, there are nothing to trim

457

if revs[endidx - 1] < len(revlog):

457

if revs[endidx - 1] < len(revlog):

458

# Trim empty revs at the end, except the very first revision of a chain

458

# Trim empty revs at the end, except the very first revision of a chain

459

while (endidx > 1

459

while (endidx > 1

460

and endidx > startidx

460

and endidx > startidx

461

and length(revs[endidx - 1]) == 0):

461

and length(revs[endidx - 1]) == 0):

462

endidx -= 1

462

endidx -= 1

463

464

return revs[startidx:endidx]

464

return revs[startidx:endidx]

465

466

def segmentspan(revlog, revs):

466

def segmentspan(revlog, revs):

467

"""Get the byte span of a segment of revisions

467

"""Get the byte span of a segment of revisions

468

469

revs is a sorted array of revision numbers

469

revs is a sorted array of revision numbers

470

471

>>> revlog = _testrevlog([

471

>>> revlog = _testrevlog([

472

... 5, #0

472

... 5, #0

473

... 10, #1

473

... 10, #1

474

... 12, #2

474

... 12, #2

475

... 12, #3 (empty)

475

... 12, #3 (empty)

476

... 17, #4

476

... 17, #4

477

... ])

477

... ])

478

479

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

479

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

480

17

480

17

481

>>> segmentspan(revlog, [0, 4])

481

>>> segmentspan(revlog, [0, 4])

482

17

482

17

483

>>> segmentspan(revlog, [3, 4])

483

>>> segmentspan(revlog, [3, 4])

484

5

484

5

485

>>> segmentspan(revlog, [1, 2, 3,])

485

>>> segmentspan(revlog, [1, 2, 3,])

486

7

486

7

487

>>> segmentspan(revlog, [1, 3])

487

>>> segmentspan(revlog, [1, 3])

488

7

488

7

489

"""

489

"""

490

if not revs:

490

if not revs:

491

return 0

491

return 0

492

end = revlog.end(revs[-1])

492

end = revlog.end(revs[-1])

493

return end - revlog.start(revs[0])

493

return end - revlog.start(revs[0])

494

495

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

495

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

496

"""build full text from a (base, delta) pair and other metadata"""

496

"""build full text from a (base, delta) pair and other metadata"""

497

# special case deltas which replace entire base; no need to decode

497

# special case deltas which replace entire base; no need to decode

498

# base revision. this neatly avoids censored bases, which throw when

498

# base revision. this neatly avoids censored bases, which throw when

499

# they're decoded.

499

# they're decoded.

500

hlen = struct.calcsize(">lll")

500

hlen = struct.calcsize(">lll")

501

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

501

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

502

len(delta) - hlen):

502

len(delta) - hlen):

503

fulltext = delta[hlen:]

503

fulltext = delta[hlen:]

504

else:

504

else:

505

# deltabase is rawtext before changed by flag processors, which is

505

# deltabase is rawtext before changed by flag processors, which is

506

# equivalent to non-raw text

506

# equivalent to non-raw text

507

basetext = revlog.revision(baserev, _df=fh, raw=False)

507

basetext = revlog.revision(baserev, _df=fh, raw=False)

508

fulltext = mdiff.patch(basetext, delta)

508

fulltext = mdiff.patch(basetext, delta)

509

510

try:

510

try:

511

res = revlog._processflags(fulltext, flags, 'read', raw=True)

511

res = revlog._processflags(fulltext, flags, 'read', raw=True)

512

fulltext, validatehash = res

512

fulltext, validatehash = res

513

if validatehash:

513

if validatehash:

514

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

514

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

515

if flags & REVIDX_ISCENSORED:

515

if flags & REVIDX_ISCENSORED:

516

raise error.StorageError(_('node %s is not censored') %

516

raise error.StorageError(_('node %s is not censored') %

517

expectednode)

517

expectednode)

518

except error.CensoredNodeError:

518

except error.CensoredNodeError:

519

# must pass the censored index flag to add censored revisions

519

# must pass the censored index flag to add censored revisions

520

if not flags & REVIDX_ISCENSORED:

520

if not flags & REVIDX_ISCENSORED:

521

raise

521

raise

522

return fulltext

522

return fulltext

523

524

@attr.s(slots=True, frozen=True)

524

@attr.s(slots=True, frozen=True)

525

class _deltainfo(object):

525

class _deltainfo(object):

526

distance = attr.ib()

526

distance = attr.ib()

527

deltalen = attr.ib()

527

deltalen = attr.ib()

528

data = attr.ib()

528

data = attr.ib()

529

base = attr.ib()

529

base = attr.ib()

530

chainbase = attr.ib()

530

chainbase = attr.ib()

531

chainlen = attr.ib()

531

chainlen = attr.ib()

532

compresseddeltalen = attr.ib()

532

compresseddeltalen = attr.ib()

533

snapshotdepth = attr.ib()

533

snapshotdepth = attr.ib()

534

535

def isgooddeltainfo(revlog, deltainfo, revinfo):

535

def isgooddeltainfo(revlog, deltainfo, revinfo):

536

"""Returns True if the given delta is good. Good means that it is within

536

"""Returns True if the given delta is good. Good means that it is within

537

the disk span, disk size, and chain length bounds that we know to be

537

the disk span, disk size, and chain length bounds that we know to be

538

performant."""

538

performant."""

539

if deltainfo is None:

539

if deltainfo is None:

540

return False

540

return False

541

542

# - 'deltainfo.distance' is the distance from the base revision --

542

# - 'deltainfo.distance' is the distance from the base revision --

543

# bounding it limits the amount of I/O we need to do.

543

# bounding it limits the amount of I/O we need to do.

544

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

544

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

545

# deltas we need to apply -- bounding it limits the amount of CPU

545

# deltas we need to apply -- bounding it limits the amount of CPU

546

# we consume.

546

# we consume.

547

548

textlen = revinfo.textlen

548

textlen = revinfo.textlen

549

defaultmax = textlen * 4

549

defaultmax = textlen * 4

550

maxdist = revlog._maxdeltachainspan

550

maxdist = revlog._maxdeltachainspan

551

if not maxdist:

551

if not maxdist:

552

maxdist = deltainfo.distance # ensure the conditional pass

552

maxdist = deltainfo.distance # ensure the conditional pass

553

maxdist = max(maxdist, defaultmax)

553

maxdist = max(maxdist, defaultmax)

554

555

# Bad delta from read span:

555

# Bad delta from read span:

556

#

556

#

557

# If the span of data read is larger than the maximum allowed.

557

# If the span of data read is larger than the maximum allowed.

558

#

558

#

559

# In the sparse-revlog case, we rely on the associated "sparse reading"

559

# In the sparse-revlog case, we rely on the associated "sparse reading"

560

# to avoid issue related to the span of data. In theory, it would be

560

# to avoid issue related to the span of data. In theory, it would be

561

# possible to build pathological revlog where delta pattern would lead

561

# possible to build pathological revlog where delta pattern would lead

562

# to too many reads. However, they do not happen in practice at all. So

562

# to too many reads. However, they do not happen in practice at all. So

563

# we skip the span check entirely.

563

# we skip the span check entirely.

564

if not revlog._sparserevlog and maxdist < deltainfo.distance:

564

if not revlog._sparserevlog and maxdist < deltainfo.distance:

565

return False

565

return False

566

567

# Bad delta from new delta size:

567

# Bad delta from new delta size:

568

#

568

#

569

# If the delta size is larger than the target text, storing the

569

# If the delta size is larger than the target text, storing the

570

# delta will be inefficient.

570

# delta will be inefficient.

571

if textlen < deltainfo.deltalen:

571

if textlen < deltainfo.deltalen:

572

return False

572

return False

573

574

# Bad delta from cumulated payload size:

574

# Bad delta from cumulated payload size:

575

#

575

#

576

# If the sum of delta get larger than K * target text length.

576

# If the sum of delta get larger than K * target text length.

577

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

577

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

578

return False

578

return False

579

580

# Bad delta from chain length:

580

# Bad delta from chain length:

581

#

581

#

582

# If the number of delta in the chain gets too high.

582

# If the number of delta in the chain gets too high.

583

if (revlog._maxchainlen

583

if (revlog._maxchainlen

584

and revlog._maxchainlen < deltainfo.chainlen):

584

and revlog._maxchainlen < deltainfo.chainlen):

585

return False

585

return False

586

587

# bad delta from intermediate snapshot size limit

587

# bad delta from intermediate snapshot size limit

588

#

588

#

589

# If an intermediate snapshot size is higher than the limit. The

589

# If an intermediate snapshot size is higher than the limit. The

590

# limit exist to prevent endless chain of intermediate delta to be

590

# limit exist to prevent endless chain of intermediate delta to be

591

# created.

591

# created.

592

if (deltainfo.snapshotdepth is not None and

592

if (deltainfo.snapshotdepth is not None and

593

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

593

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

594

return False

594

return False

595

596

# bad delta if new intermediate snapshot is larger than the previous

596

# bad delta if new intermediate snapshot is larger than the previous

597

# snapshot

597

# snapshot

598

if (deltainfo.snapshotdepth

598

if (deltainfo.snapshotdepth

599

and revlog.length(deltainfo.base) < deltainfo.deltalen):

599

and revlog.length(deltainfo.base) < deltainfo.deltalen):

600

return False

600

return False

601

602

return True

602

return True

603

604

# If a revision's full text is that much bigger than a base candidate full

604

# If a revision's full text is that much bigger than a base candidate full

605

# text's, it is very unlikely that it will produce a valid delta. We no longer

605

# text's, it is very unlikely that it will produce a valid delta. We no longer

606

# consider these candidates.

606

# consider these candidates.

607

LIMIT_BASE2TEXT = 50

607

LIMIT_BASE2TEXT = 500

608

609

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

609

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

610

"""Provides group of revision to be tested as delta base

610

"""Provides group of revision to be tested as delta base

611

612

This top level function focus on emitting groups with unique and worthwhile

612

This top level function focus on emitting groups with unique and worthwhile

613

content. See _raw_candidate_groups for details about the group order.

613

content. See _raw_candidate_groups for details about the group order.

614

"""

614

"""

615

# should we try to build a delta?

615

# should we try to build a delta?

616

if not (len(revlog) and revlog._storedeltachains):

616

if not (len(revlog) and revlog._storedeltachains):

617

yield None

617

yield None

618

return

618

return

619

620

deltalength = revlog.length

620

deltalength = revlog.length

621

deltaparent = revlog.deltaparent

621

deltaparent = revlog.deltaparent

622

sparse = revlog._sparserevlog

622

sparse = revlog._sparserevlog

623

good = None

623

good = None

624

625

deltas_limit = textlen * LIMIT_DELTA2TEXT

625

deltas_limit = textlen * LIMIT_DELTA2TEXT

626

627

tested = set([nullrev])

627

tested = set([nullrev])

628

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

628

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

629

while True:

629

while True:

630

temptative = candidates.send(good)

630

temptative = candidates.send(good)

631

if temptative is None:

631

if temptative is None:

632

break

632

break

633

group = []

633

group = []

634

for rev in temptative:

634

for rev in temptative:

635

# skip over empty delta (no need to include them in a chain)

635

# skip over empty delta (no need to include them in a chain)

636

while (revlog._generaldelta

636

while (revlog._generaldelta

637

and not (rev == nullrev

637

and not (rev == nullrev

638

or rev in tested

638

or rev in tested

639

or deltalength(rev))):

639

or deltalength(rev))):

640

tested.add(rev)

640

tested.add(rev)

641

rev = deltaparent(rev)

641

rev = deltaparent(rev)

642

# no need to try a delta against nullrev, this will be done as a

642

# no need to try a delta against nullrev, this will be done as a

643

# last resort.

643

# last resort.

644

if rev == nullrev:

644

if rev == nullrev:

645

continue

645

continue

646

# filter out revision we tested already

646

# filter out revision we tested already

647

if rev in tested:

647

if rev in tested:

648

continue

648

continue

649

tested.add(rev)

649

tested.add(rev)

650

# filter out delta base that will never produce good delta

650

# filter out delta base that will never produce good delta

651

if deltas_limit < revlog.length(rev):

651

if deltas_limit < revlog.length(rev):

652

continue

652

continue

653

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

653

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

654

continue

654

continue

655

# no delta for rawtext-changing revs (see "candelta" for why)

655

# no delta for rawtext-changing revs (see "candelta" for why)

656

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

656

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

657

continue

657

continue

658

# If we reach here, we are about to build and test a delta.

658

# If we reach here, we are about to build and test a delta.

659

# The delta building process will compute the chaininfo in all

659

# The delta building process will compute the chaininfo in all

660

# case, since that computation is cached, it is fine to access it

660

# case, since that computation is cached, it is fine to access it

661

# here too.

661

# here too.

662

chainlen, chainsize = revlog._chaininfo(rev)

662

chainlen, chainsize = revlog._chaininfo(rev)

663

# if chain will be too long, skip base

663

# if chain will be too long, skip base

664

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

664

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

665

continue

665

continue

666

# if chain already have too much data, skip base

666

# if chain already have too much data, skip base

667

if deltas_limit < chainsize:

667

if deltas_limit < chainsize:

668

continue

668

continue

669

group.append(rev)

669

group.append(rev)

670

if group:

670

if group:

671

# XXX: in the sparse revlog case, group can become large,

671

# XXX: in the sparse revlog case, group can become large,

672

# impacting performances. Some bounding or slicing mecanism

672

# impacting performances. Some bounding or slicing mecanism

673

# would help to reduce this impact.

673

# would help to reduce this impact.

674

good = yield tuple(group)

674

good = yield tuple(group)

675

yield None

675

yield None

676

677

def _findsnapshots(revlog, cache, start_rev):

677

def _findsnapshots(revlog, cache, start_rev):

678

"""find snapshot from start_rev to tip"""

678

"""find snapshot from start_rev to tip"""

679

deltaparent = revlog.deltaparent

679

deltaparent = revlog.deltaparent

680

issnapshot = revlog.issnapshot

680

issnapshot = revlog.issnapshot

681

for rev in revlog.revs(start_rev):

681

for rev in revlog.revs(start_rev):

682

if issnapshot(rev):

682

if issnapshot(rev):

683

cache[deltaparent(rev)].append(rev)

683

cache[deltaparent(rev)].append(rev)

684

685

def _refinedgroups(revlog, p1, p2, cachedelta):

685

def _refinedgroups(revlog, p1, p2, cachedelta):

686

good = None

686

good = None

687

# First we try to reuse a the delta contained in the bundle.

687

# First we try to reuse a the delta contained in the bundle.

688

# (or from the source revlog)

688

# (or from the source revlog)

689

#

689

#

690

# This logic only applies to general delta repositories and can be disabled

690

# This logic only applies to general delta repositories and can be disabled

691

# through configuration. Disabling reuse source delta is useful when

691

# through configuration. Disabling reuse source delta is useful when

692

# we want to make sure we recomputed "optimal" deltas.

692

# we want to make sure we recomputed "optimal" deltas.

693

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

693

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

694

# Assume what we received from the server is a good choice

694

# Assume what we received from the server is a good choice

695

# build delta will reuse the cache

695

# build delta will reuse the cache

696

good = yield (cachedelta[0],)

696

good = yield (cachedelta[0],)

697

if good is not None:

697

if good is not None:

698

yield None

698

yield None

699

return

699

return

700

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

700

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

701

good = yield candidates

701

good = yield candidates

702

if good is not None:

702

if good is not None:

703

break

703

break

704

705

# If sparse revlog is enabled, we can try to refine the available deltas

705

# If sparse revlog is enabled, we can try to refine the available deltas

706

if not revlog._sparserevlog:

706

if not revlog._sparserevlog:

707

yield None

707

yield None

708

return

708

return

709

710

# if we have a refinable value, try to refine it

710

# if we have a refinable value, try to refine it

711

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

711

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

712

# refine snapshot down

712

# refine snapshot down

713

previous = None

713

previous = None

714

while previous != good:

714

while previous != good:

715

previous = good

715

previous = good

716

base = revlog.deltaparent(good)

716

base = revlog.deltaparent(good)

717

if base == nullrev:

717

if base == nullrev:

718

break

718

break

719

good = yield (base,)

719

good = yield (base,)

720

# refine snapshot up

720

# refine snapshot up

721

#

721

#

722

# XXX the _findsnapshots call can be expensive and is "duplicated" with

722

# XXX the _findsnapshots call can be expensive and is "duplicated" with

723

# the one done in `_rawgroups`. Once we start working on performance,

723

# the one done in `_rawgroups`. Once we start working on performance,

724

# we should make the two logics share this computation.

724

# we should make the two logics share this computation.

725

snapshots = collections.defaultdict(list)

725

snapshots = collections.defaultdict(list)

726

_findsnapshots(revlog, snapshots, good + 1)

726

_findsnapshots(revlog, snapshots, good + 1)

727

previous = None

727

previous = None

728

while good != previous:

728

while good != previous:

729

previous = good

729

previous = good

730

children = tuple(sorted(c for c in snapshots[good]))

730

children = tuple(sorted(c for c in snapshots[good]))

731

good = yield children

731

good = yield children

732

733

# we have found nothing

733

# we have found nothing

734

yield None

734

yield None

735

736

def _rawgroups(revlog, p1, p2, cachedelta):

736

def _rawgroups(revlog, p1, p2, cachedelta):

737

"""Provides group of revision to be tested as delta base

737

"""Provides group of revision to be tested as delta base

738

739

This lower level function focus on emitting delta theorically interresting

739

This lower level function focus on emitting delta theorically interresting

740

without looking it any practical details.

740

without looking it any practical details.

741

742

The group order aims at providing fast or small candidates first.

742

The group order aims at providing fast or small candidates first.

743

"""

743

"""

744

gdelta = revlog._generaldelta

744

gdelta = revlog._generaldelta

745

sparse = revlog._sparserevlog

745

sparse = revlog._sparserevlog

746

curr = len(revlog)

746

curr = len(revlog)

747

prev = curr - 1

747

prev = curr - 1

748

deltachain = lambda rev: revlog._deltachain(rev)[0]

748

deltachain = lambda rev: revlog._deltachain(rev)[0]

749

750

if gdelta:

750

if gdelta:

751

# exclude already lazy tested base if any

751

# exclude already lazy tested base if any

752

parents = [p for p in (p1, p2) if p != nullrev]

752

parents = [p for p in (p1, p2) if p != nullrev]

753

754

if not revlog._deltabothparents and len(parents) == 2:

754

if not revlog._deltabothparents and len(parents) == 2:

755

parents.sort()

755

parents.sort()

756

# To minimize the chance of having to build a fulltext,

756

# To minimize the chance of having to build a fulltext,

757

# pick first whichever parent is closest to us (max rev)

757

# pick first whichever parent is closest to us (max rev)

758

yield (parents[1],)

758

yield (parents[1],)

759

# then the other one (min rev) if the first did not fit

759

# then the other one (min rev) if the first did not fit

760

yield (parents[0],)

760

yield (parents[0],)

761

elif len(parents) > 0:

761

elif len(parents) > 0:

762

# Test all parents (1 or 2), and keep the best candidate

762

# Test all parents (1 or 2), and keep the best candidate

763

yield parents

763

yield parents

764

765

if sparse and parents:

765

if sparse and parents:

766

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

766

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

767

# See if we can use an existing snapshot in the parent chains to use as

767

# See if we can use an existing snapshot in the parent chains to use as

768

# a base for a new intermediate-snapshot

768

# a base for a new intermediate-snapshot

769

#

769

#

770

# search for snapshot in parents delta chain

770

# search for snapshot in parents delta chain

771

# map: snapshot-level: snapshot-rev

771

# map: snapshot-level: snapshot-rev

772

parents_snaps = collections.defaultdict(set)

772

parents_snaps = collections.defaultdict(set)

773

candidate_chains = [deltachain(p) for p in parents]

773

candidate_chains = [deltachain(p) for p in parents]

774

for chain in candidate_chains:

774

for chain in candidate_chains:

775

for idx, s in enumerate(chain):

775

for idx, s in enumerate(chain):

776

if not revlog.issnapshot(s):

776

if not revlog.issnapshot(s):

777

break

777

break

778

parents_snaps[idx].add(s)

778

parents_snaps[idx].add(s)

779

snapfloor = min(parents_snaps[0]) + 1

779

snapfloor = min(parents_snaps[0]) + 1

780

_findsnapshots(revlog, snapshots, snapfloor)

780

_findsnapshots(revlog, snapshots, snapfloor)

781

# search for the highest "unrelated" revision

781

# search for the highest "unrelated" revision

782

#

782

#

783

# Adding snapshots used by "unrelated" revision increase the odd we

783

# Adding snapshots used by "unrelated" revision increase the odd we

784

# reuse an independant, yet better snapshot chain.

784

# reuse an independant, yet better snapshot chain.

785

#

785

#

786

# XXX instead of building a set of revisions, we could lazily enumerate

786

# XXX instead of building a set of revisions, we could lazily enumerate

787

# over the chains. That would be more efficient, however we stick to

787

# over the chains. That would be more efficient, however we stick to

788

# simple code for now.

788

# simple code for now.

789

all_revs = set()

789

all_revs = set()

790

for chain in candidate_chains:

790

for chain in candidate_chains:

791

all_revs.update(chain)

791

all_revs.update(chain)

792

other = None

792

other = None

793

for r in revlog.revs(prev, snapfloor):

793

for r in revlog.revs(prev, snapfloor):

794

if r not in all_revs:

794

if r not in all_revs:

795

other = r

795

other = r

796

break

796

break

797

if other is not None:

797

if other is not None:

798

# To avoid unfair competition, we won't use unrelated intermediate

798

# To avoid unfair competition, we won't use unrelated intermediate

799

# snapshot that are deeper than the ones from the parent delta

799

# snapshot that are deeper than the ones from the parent delta

800

# chain.

800

# chain.

801

max_depth = max(parents_snaps.keys())

801

max_depth = max(parents_snaps.keys())

802

chain = deltachain(other)

802

chain = deltachain(other)

803

for idx, s in enumerate(chain):

803

for idx, s in enumerate(chain):

804

if s < snapfloor:

804

if s < snapfloor:

805

continue

805

continue

806

if max_depth < idx:

806

if max_depth < idx:

807

break

807

break

808

if not revlog.issnapshot(s):

808

if not revlog.issnapshot(s):

809

break

809

break

810

parents_snaps[idx].add(s)

810

parents_snaps[idx].add(s)

811

# Test them as possible intermediate snapshot base

811

# Test them as possible intermediate snapshot base

812

# We test them from highest to lowest level. High level one are more

812

# We test them from highest to lowest level. High level one are more

813

# likely to result in small delta

813

# likely to result in small delta

814

floor = None

814

floor = None

815

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

815

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

816

siblings = set()

816

siblings = set()

817

for s in snaps:

817

for s in snaps:

818

siblings.update(snapshots[s])

818

siblings.update(snapshots[s])

819

# Before considering making a new intermediate snapshot, we check

819

# Before considering making a new intermediate snapshot, we check

820

# if an existing snapshot, children of base we consider, would be

820

# if an existing snapshot, children of base we consider, would be

821

# suitable.

821

# suitable.

822

#

822

#

823

# It give a change to reuse a delta chain "unrelated" to the

823

# It give a change to reuse a delta chain "unrelated" to the

824

# current revision instead of starting our own. Without such

824

# current revision instead of starting our own. Without such

825

# re-use, topological branches would keep reopening new chains.

825

# re-use, topological branches would keep reopening new chains.

826

# Creating more and more snapshot as the repository grow.

826

# Creating more and more snapshot as the repository grow.

827

828

if floor is not None:

828

if floor is not None:

829

# We only do this for siblings created after the one in our

829

# We only do this for siblings created after the one in our

830

# parent's delta chain. Those created before has less chances

830

# parent's delta chain. Those created before has less chances

831

# to be valid base since our ancestors had to create a new

831

# to be valid base since our ancestors had to create a new

832

# snapshot.

832

# snapshot.

833

siblings = [r for r in siblings if floor < r]

833

siblings = [r for r in siblings if floor < r]

834

yield tuple(sorted(siblings))

834

yield tuple(sorted(siblings))

835

# then test the base from our parent's delta chain.

835

# then test the base from our parent's delta chain.

836

yield tuple(sorted(snaps))

836

yield tuple(sorted(snaps))

837

floor = min(snaps)

837

floor = min(snaps)

838

# No suitable base found in the parent chain, search if any full

838

# No suitable base found in the parent chain, search if any full

839

# snapshots emitted since parent's base would be a suitable base for an

839

# snapshots emitted since parent's base would be a suitable base for an

840

# intermediate snapshot.

840

# intermediate snapshot.

841

#

841

#

842

# It give a chance to reuse a delta chain unrelated to the current

842

# It give a chance to reuse a delta chain unrelated to the current

843

# revisions instead of starting our own. Without such re-use,

843

# revisions instead of starting our own. Without such re-use,

844

# topological branches would keep reopening new full chains. Creating

844

# topological branches would keep reopening new full chains. Creating

845

# more and more snapshot as the repository grow.

845

# more and more snapshot as the repository grow.

846

yield tuple(snapshots[nullrev])

846

yield tuple(snapshots[nullrev])

847

848

if not sparse:

848

if not sparse:

849

# other approach failed try against prev to hopefully save us a

849

# other approach failed try against prev to hopefully save us a

850

# fulltext.

850

# fulltext.

851

yield (prev,)

851

yield (prev,)

852

853

class deltacomputer(object):

853

class deltacomputer(object):

854

def __init__(self, revlog):

854

def __init__(self, revlog):

855

self.revlog = revlog

855

self.revlog = revlog

856

857

def buildtext(self, revinfo, fh):

857

def buildtext(self, revinfo, fh):

858

"""Builds a fulltext version of a revision

858

"""Builds a fulltext version of a revision

859

860

revinfo: _revisioninfo instance that contains all needed info

860

revinfo: _revisioninfo instance that contains all needed info

861

fh: file handle to either the .i or the .d revlog file,

861

fh: file handle to either the .i or the .d revlog file,

862

depending on whether it is inlined or not

862

depending on whether it is inlined or not

863

"""

863

"""

864

btext = revinfo.btext

864

btext = revinfo.btext

865

if btext[0] is not None:

865

if btext[0] is not None:

866

return btext[0]

866

return btext[0]

867

868

revlog = self.revlog

868

revlog = self.revlog

869

cachedelta = revinfo.cachedelta

869

cachedelta = revinfo.cachedelta

870

baserev = cachedelta[0]

870

baserev = cachedelta[0]

871

delta = cachedelta[1]

871

delta = cachedelta[1]

872

873

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

873

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

874

revinfo.p1, revinfo.p2,

874

revinfo.p1, revinfo.p2,

875

revinfo.flags, revinfo.node)

875

revinfo.flags, revinfo.node)

876

return fulltext

876

return fulltext

877

878

def _builddeltadiff(self, base, revinfo, fh):

878

def _builddeltadiff(self, base, revinfo, fh):

879

revlog = self.revlog

879

revlog = self.revlog

880

t = self.buildtext(revinfo, fh)

880

t = self.buildtext(revinfo, fh)

881

if revlog.iscensored(base):

881

if revlog.iscensored(base):

882

# deltas based on a censored revision must replace the

882

# deltas based on a censored revision must replace the

883

# full content in one patch, so delta works everywhere

883

# full content in one patch, so delta works everywhere

884

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

884

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

885

delta = header + t

885

delta = header + t

886

else:

886

else:

887

ptext = revlog.revision(base, _df=fh, raw=True)

887

ptext = revlog.revision(base, _df=fh, raw=True)

888

delta = mdiff.textdiff(ptext, t)

888

delta = mdiff.textdiff(ptext, t)

889

890

return delta

890

return delta

891

892

def _builddeltainfo(self, revinfo, base, fh):

892

def _builddeltainfo(self, revinfo, base, fh):

893

# can we use the cached delta?

893

# can we use the cached delta?

894

delta = None

894

delta = None

895

if revinfo.cachedelta:

895

if revinfo.cachedelta:

896

cachebase, cachediff = revinfo.cachedelta

896

cachebase, cachediff = revinfo.cachedelta

897

#check if the diff still apply

897

#check if the diff still apply

898

currentbase = cachebase

898

currentbase = cachebase

899

while (currentbase != nullrev

899

while (currentbase != nullrev

900

and currentbase != base

900

and currentbase != base

901

and self.revlog.length(currentbase) == 0):

901

and self.revlog.length(currentbase) == 0):

902

currentbase = self.revlog.deltaparent(currentbase)

902

currentbase = self.revlog.deltaparent(currentbase)

903

if currentbase == base:

903

if currentbase == base:

904

delta = revinfo.cachedelta[1]

904

delta = revinfo.cachedelta[1]

905

if delta is None:

905

if delta is None:

906

delta = self._builddeltadiff(base, revinfo, fh)

906

delta = self._builddeltadiff(base, revinfo, fh)

907

revlog = self.revlog

907

revlog = self.revlog

908

header, data = revlog.compress(delta)

908

header, data = revlog.compress(delta)

909

deltalen = len(header) + len(data)

909

deltalen = len(header) + len(data)

910

chainbase = revlog.chainbase(base)

910

chainbase = revlog.chainbase(base)

911

offset = revlog.end(len(revlog) - 1)

911

offset = revlog.end(len(revlog) - 1)

912

dist = deltalen + offset - revlog.start(chainbase)

912

dist = deltalen + offset - revlog.start(chainbase)

913

if revlog._generaldelta:

913

if revlog._generaldelta:

914

deltabase = base

914

deltabase = base

915

else:

915

else:

916

deltabase = chainbase

916

deltabase = chainbase

917

chainlen, compresseddeltalen = revlog._chaininfo(base)

917

chainlen, compresseddeltalen = revlog._chaininfo(base)

918

chainlen += 1

918

chainlen += 1

919

compresseddeltalen += deltalen

919

compresseddeltalen += deltalen

920

921

revlog = self.revlog

921

revlog = self.revlog

922

snapshotdepth = None

922

snapshotdepth = None

923

if deltabase == nullrev:

923

if deltabase == nullrev:

924

snapshotdepth = 0

924

snapshotdepth = 0

925

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

925

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

926

# A delta chain should always be one full snapshot,

926

# A delta chain should always be one full snapshot,

927

# zero or more semi-snapshots, and zero or more deltas

927

# zero or more semi-snapshots, and zero or more deltas

928

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

928

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

929

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

929

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

930

snapshotdepth = len(revlog._deltachain(deltabase)[0])

930

snapshotdepth = len(revlog._deltachain(deltabase)[0])

931

932

return _deltainfo(dist, deltalen, (header, data), deltabase,

932

return _deltainfo(dist, deltalen, (header, data), deltabase,

933

chainbase, chainlen, compresseddeltalen,

933

chainbase, chainlen, compresseddeltalen,

934

snapshotdepth)

934

snapshotdepth)

935

936

def _fullsnapshotinfo(self, fh, revinfo):

936

def _fullsnapshotinfo(self, fh, revinfo):

937

curr = len(self.revlog)

937

curr = len(self.revlog)

938

rawtext = self.buildtext(revinfo, fh)

938

rawtext = self.buildtext(revinfo, fh)

939

data = self.revlog.compress(rawtext)

939

data = self.revlog.compress(rawtext)

940

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

940

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

941

deltabase = chainbase = curr

941

deltabase = chainbase = curr

942

snapshotdepth = 0

942

snapshotdepth = 0

943

chainlen = 1

943

chainlen = 1

944

945

return _deltainfo(dist, deltalen, data, deltabase,

945

return _deltainfo(dist, deltalen, data, deltabase,

946

chainbase, chainlen, compresseddeltalen,

946

chainbase, chainlen, compresseddeltalen,

947

snapshotdepth)

947

snapshotdepth)

948

949

def finddeltainfo(self, revinfo, fh):

949

def finddeltainfo(self, revinfo, fh):

950

"""Find an acceptable delta against a candidate revision

950

"""Find an acceptable delta against a candidate revision

951

952

revinfo: information about the revision (instance of _revisioninfo)

952

revinfo: information about the revision (instance of _revisioninfo)

953

fh: file handle to either the .i or the .d revlog file,

953

fh: file handle to either the .i or the .d revlog file,

954

depending on whether it is inlined or not

954

depending on whether it is inlined or not

955

956

Returns the first acceptable candidate revision, as ordered by

956

Returns the first acceptable candidate revision, as ordered by

957

_candidategroups

957

_candidategroups

958

959

If no suitable deltabase is found, we return delta info for a full

959

If no suitable deltabase is found, we return delta info for a full

960

snapshot.

960

snapshot.

961

"""

961

"""

962

if not revinfo.textlen:

962

if not revinfo.textlen:

963

return self._fullsnapshotinfo(fh, revinfo)

963

return self._fullsnapshotinfo(fh, revinfo)

964

965

# no delta for flag processor revision (see "candelta" for why)

965

# no delta for flag processor revision (see "candelta" for why)

966

# not calling candelta since only one revision needs test, also to

966

# not calling candelta since only one revision needs test, also to

967

# avoid overhead fetching flags again.

967

# avoid overhead fetching flags again.

968

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

968

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

969

return self._fullsnapshotinfo(fh, revinfo)

969

return self._fullsnapshotinfo(fh, revinfo)

970

971

cachedelta = revinfo.cachedelta

971

cachedelta = revinfo.cachedelta

972

p1 = revinfo.p1

972

p1 = revinfo.p1

973

p2 = revinfo.p2

973

p2 = revinfo.p2

974

revlog = self.revlog

974

revlog = self.revlog

975

976

deltainfo = None

976

deltainfo = None

977

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

977

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

978

groups = _candidategroups(self.revlog, revinfo.textlen,

978

groups = _candidategroups(self.revlog, revinfo.textlen,

979

p1r, p2r, cachedelta)

979

p1r, p2r, cachedelta)

980

candidaterevs = next(groups)

980

candidaterevs = next(groups)

981

while candidaterevs is not None:

981

while candidaterevs is not None:

982

nominateddeltas = []

982

nominateddeltas = []

983

if deltainfo is not None:

983

if deltainfo is not None:

984

# if we already found a good delta,

984

# if we already found a good delta,

985

# challenge it against refined candidates

985

# challenge it against refined candidates

986

nominateddeltas.append(deltainfo)

986

nominateddeltas.append(deltainfo)

987

for candidaterev in candidaterevs:

987

for candidaterev in candidaterevs:

988

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

988

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

989

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

989

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

990

nominateddeltas.append(candidatedelta)

990

nominateddeltas.append(candidatedelta)

991

if nominateddeltas:

991

if nominateddeltas:

992

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

992

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

993

if deltainfo is not None:

993

if deltainfo is not None:

994

candidaterevs = groups.send(deltainfo.base)

994

candidaterevs = groups.send(deltainfo.base)

995

else:

995

else:

996

candidaterevs = next(groups)

996

candidaterevs = next(groups)

997

998

if deltainfo is None:

998

if deltainfo is None:

999

deltainfo = self._fullsnapshotinfo(fh, revinfo)

999

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1000

return deltainfo

1000

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(revs,
                                             revlog._srdensitythreshold,
                                             revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
-            LIMIT_BASE2TEXT = 50
+            LIMIT_BASE2TEXT = 500
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo