upstream/mercurial-mirror Commit - r43147:1ebf1a1e

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

util,

33

util,

34

)

34

)

35

36

# maximum <delta-chain-data>/<revision-text-length> ratio

36

# maximum <delta-chain-data>/<revision-text-length> ratio

37

LIMIT_DELTA2TEXT = 2

37

LIMIT_DELTA2TEXT = 2

38

39

class _testrevlog(object):

39

class _testrevlog(object):

40

"""minimalist fake revlog to use in doctests"""

40

"""minimalist fake revlog to use in doctests"""

41

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

43

"""data is an list of revision payload boundaries"""

43

"""data is an list of revision payload boundaries"""

44

self._data = data

44

self._data = data

45

self._srdensitythreshold = density

45

self._srdensitythreshold = density

46

self._srmingapsize = mingap

46

self._srmingapsize = mingap

47

self._snapshot = set(snapshot)

47

self._snapshot = set(snapshot)

48

self.index = None

48

self.index = None

49

50

def start(self, rev):

50

def start(self, rev):

51

if rev == nullrev:

51

if rev == nullrev:

52

return 0

52

return 0

53

if rev == 0:

53

if rev == 0:

54

return 0

54

return 0

55

return self._data[rev - 1]

55

return self._data[rev - 1]

56

57

def end(self, rev):

57

def end(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

return self._data[rev]

60

return self._data[rev]

61

62

def length(self, rev):

62

def length(self, rev):

63

return self.end(rev) - self.start(rev)

63

return self.end(rev) - self.start(rev)

64

65

def __len__(self):

65

def __len__(self):

66

return len(self._data)

66

return len(self._data)

67

68

def issnapshot(self, rev):

68

def issnapshot(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return True

70

return True

71

return rev in self._snapshot

71

return rev in self._snapshot

72

73

def slicechunk(revlog, revs, targetsize=None):

73

def slicechunk(revlog, revs, targetsize=None):

74

"""slice revs to reduce the amount of unrelated data to be read from disk.

74

"""slice revs to reduce the amount of unrelated data to be read from disk.

75

76

``revs`` is sliced into groups that should be read in one time.

76

``revs`` is sliced into groups that should be read in one time.

77

Assume that revs are sorted.

77

Assume that revs are sorted.

78

79

The initial chunk is sliced until the overall density (payload/chunks-span

79

The initial chunk is sliced until the overall density (payload/chunks-span

80

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

80

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

81

`revlog._srmingapsize` is skipped.

81

`revlog._srmingapsize` is skipped.

82

83

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

83

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

84

For consistency with other slicing choice, this limit won't go lower than

84

For consistency with other slicing choice, this limit won't go lower than

85

`revlog._srmingapsize`.

85

`revlog._srmingapsize`.

86

87

If individual revisions chunk are larger than this limit, they will still

87

If individual revisions chunk are larger than this limit, they will still

88

be raised individually.

88

be raised individually.

89

90

>>> data = [

90

>>> data = [

91

... 5, #00 (5)

91

... 5, #00 (5)

92

... 10, #01 (5)

92

... 10, #01 (5)

93

... 12, #02 (2)

93

... 12, #02 (2)

94

... 12, #03 (empty)

94

... 12, #03 (empty)

95

... 27, #04 (15)

95

... 27, #04 (15)

96

... 31, #05 (4)

96

... 31, #05 (4)

97

... 31, #06 (empty)

97

... 31, #06 (empty)

98

... 42, #07 (11)

98

... 42, #07 (11)

99

... 47, #08 (5)

99

... 47, #08 (5)

100

... 47, #09 (empty)

100

... 47, #09 (empty)

101

... 48, #10 (1)

101

... 48, #10 (1)

102

... 51, #11 (3)

102

... 51, #11 (3)

103

... 74, #12 (23)

103

... 74, #12 (23)

104

... 85, #13 (11)

104

... 85, #13 (11)

105

... 86, #14 (1)

105

... 86, #14 (1)

106

... 91, #15 (5)

106

... 91, #15 (5)

107

... ]

107

... ]

108

>>> revlog = _testrevlog(data, snapshot=range(16))

108

>>> revlog = _testrevlog(data, snapshot=range(16))

109

110

>>> list(slicechunk(revlog, list(range(16))))

110

>>> list(slicechunk(revlog, list(range(16))))

111

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

111

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

112

>>> list(slicechunk(revlog, [0, 15]))

112

>>> list(slicechunk(revlog, [0, 15]))

113

[[0], [15]]

113

[[0], [15]]

114

>>> list(slicechunk(revlog, [0, 11, 15]))

114

>>> list(slicechunk(revlog, [0, 11, 15]))

115

[[0], [11], [15]]

115

[[0], [11], [15]]

116

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

116

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

117

[[0], [11, 13, 15]]

117

[[0], [11, 13, 15]]

118

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

118

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

119

[[1, 2], [5, 8, 10, 11], [14]]

119

[[1, 2], [5, 8, 10, 11], [14]]

120

121

Slicing with a maximum chunk size

121

Slicing with a maximum chunk size

122

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

122

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

123

[[0], [11], [13], [15]]

123

[[0], [11], [13], [15]]

124

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

124

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

125

[[0], [11], [13, 15]]

125

[[0], [11], [13, 15]]

126

127

Slicing involving nullrev

127

Slicing involving nullrev

128

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

128

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

129

[[-1, 0], [11], [13, 15]]

129

[[-1, 0], [11], [13, 15]]

130

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

130

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

131

[[-1], [13], [15]]

131

[[-1], [13], [15]]

132

"""

132

"""

133

if targetsize is not None:

133

if targetsize is not None:

134

targetsize = max(targetsize, revlog._srmingapsize)

134

targetsize = max(targetsize, revlog._srmingapsize)

135

# targetsize should not be specified when evaluating delta candidates:

135

# targetsize should not be specified when evaluating delta candidates:

136

# * targetsize is used to ensure we stay within specification when reading,

136

# * targetsize is used to ensure we stay within specification when reading,

137

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

137

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

138

if densityslicing is None:

138

if densityslicing is None:

139

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

139

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

140

for chunk in densityslicing(revs,

140

for chunk in densityslicing(revs,

141

revlog._srdensitythreshold,

141

revlog._srdensitythreshold,

142

revlog._srmingapsize):

142

revlog._srmingapsize):

143

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

143

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

144

yield subchunk

144

yield subchunk

145

146

def _slicechunktosize(revlog, revs, targetsize=None):

146

def _slicechunktosize(revlog, revs, targetsize=None):

147

"""slice revs to match the target size

147

"""slice revs to match the target size

148

149

This is intended to be used on chunk that density slicing selected by that

149

This is intended to be used on chunk that density slicing selected by that

150

are still too large compared to the read garantee of revlog. This might

150

are still too large compared to the read garantee of revlog. This might

151

happens when "minimal gap size" interrupted the slicing or when chain are

151

happens when "minimal gap size" interrupted the slicing or when chain are

152

built in a way that create large blocks next to each other.

152

built in a way that create large blocks next to each other.

153

154

>>> data = [

154

>>> data = [

155

... 3, #0 (3)

155

... 3, #0 (3)

156

... 5, #1 (2)

156

... 5, #1 (2)

157

... 6, #2 (1)

157

... 6, #2 (1)

158

... 8, #3 (2)

158

... 8, #3 (2)

159

... 8, #4 (empty)

159

... 8, #4 (empty)

160

... 11, #5 (3)

160

... 11, #5 (3)

161

... 12, #6 (1)

161

... 12, #6 (1)

162

... 13, #7 (1)

162

... 13, #7 (1)

163

... 14, #8 (1)

163

... 14, #8 (1)

164

... ]

164

... ]

165

166

== All snapshots cases ==

166

== All snapshots cases ==

167

>>> revlog = _testrevlog(data, snapshot=range(9))

167

>>> revlog = _testrevlog(data, snapshot=range(9))

168

169

Cases where chunk is already small enough

169

Cases where chunk is already small enough

170

>>> list(_slicechunktosize(revlog, [0], 3))

170

>>> list(_slicechunktosize(revlog, [0], 3))

171

[[0]]

171

[[0]]

172

>>> list(_slicechunktosize(revlog, [6, 7], 3))

172

>>> list(_slicechunktosize(revlog, [6, 7], 3))

173

[[6, 7]]

173

[[6, 7]]

174

>>> list(_slicechunktosize(revlog, [0], None))

174

>>> list(_slicechunktosize(revlog, [0], None))

175

[[0]]

175

[[0]]

176

>>> list(_slicechunktosize(revlog, [6, 7], None))

176

>>> list(_slicechunktosize(revlog, [6, 7], None))

177

[[6, 7]]

177

[[6, 7]]

178

179

cases where we need actual slicing

179

cases where we need actual slicing

180

>>> list(_slicechunktosize(revlog, [0, 1], 3))

180

>>> list(_slicechunktosize(revlog, [0, 1], 3))

181

[[0], [1]]

181

[[0], [1]]

182

>>> list(_slicechunktosize(revlog, [1, 3], 3))

182

>>> list(_slicechunktosize(revlog, [1, 3], 3))

183

[[1], [3]]

183

[[1], [3]]

184

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

184

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

185

[[1, 2], [3]]

185

[[1, 2], [3]]

186

>>> list(_slicechunktosize(revlog, [3, 5], 3))

186

>>> list(_slicechunktosize(revlog, [3, 5], 3))

187

[[3], [5]]

187

[[3], [5]]

188

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

188

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

189

[[3], [5]]

189

[[3], [5]]

190

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

190

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

191

[[5], [6, 7, 8]]

191

[[5], [6, 7, 8]]

192

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

192

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

193

[[0], [1, 2], [3], [5], [6, 7, 8]]

193

[[0], [1, 2], [3], [5], [6, 7, 8]]

194

195

Case with too large individual chunk (must return valid chunk)

195

Case with too large individual chunk (must return valid chunk)

196

>>> list(_slicechunktosize(revlog, [0, 1], 2))

196

>>> list(_slicechunktosize(revlog, [0, 1], 2))

197

[[0], [1]]

197

[[0], [1]]

198

>>> list(_slicechunktosize(revlog, [1, 3], 1))

198

>>> list(_slicechunktosize(revlog, [1, 3], 1))

199

[[1], [3]]

199

[[1], [3]]

200

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

200

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

201

[[3], [5]]

201

[[3], [5]]

202

203

== No Snapshot cases ==

203

== No Snapshot cases ==

204

>>> revlog = _testrevlog(data)

204

>>> revlog = _testrevlog(data)

205

206

Cases where chunk is already small enough

206

Cases where chunk is already small enough

207

>>> list(_slicechunktosize(revlog, [0], 3))

207

>>> list(_slicechunktosize(revlog, [0], 3))

208

[[0]]

208

[[0]]

209

>>> list(_slicechunktosize(revlog, [6, 7], 3))

209

>>> list(_slicechunktosize(revlog, [6, 7], 3))

210

[[6, 7]]

210

[[6, 7]]

211

>>> list(_slicechunktosize(revlog, [0], None))

211

>>> list(_slicechunktosize(revlog, [0], None))

212

[[0]]

212

[[0]]

213

>>> list(_slicechunktosize(revlog, [6, 7], None))

213

>>> list(_slicechunktosize(revlog, [6, 7], None))

214

[[6, 7]]

214

[[6, 7]]

215

216

cases where we need actual slicing

216

cases where we need actual slicing

217

>>> list(_slicechunktosize(revlog, [0, 1], 3))

217

>>> list(_slicechunktosize(revlog, [0, 1], 3))

218

[[0], [1]]

218

[[0], [1]]

219

>>> list(_slicechunktosize(revlog, [1, 3], 3))

219

>>> list(_slicechunktosize(revlog, [1, 3], 3))

220

[[1], [3]]

220

[[1], [3]]

221

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

221

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

222

[[1], [2, 3]]

222

[[1], [2, 3]]

223

>>> list(_slicechunktosize(revlog, [3, 5], 3))

223

>>> list(_slicechunktosize(revlog, [3, 5], 3))

224

[[3], [5]]

224

[[3], [5]]

225

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

225

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

226

[[3], [4, 5]]

226

[[3], [4, 5]]

227

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

227

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

228

[[5], [6, 7, 8]]

228

[[5], [6, 7, 8]]

229

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

229

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

230

[[0], [1, 2], [3], [5], [6, 7, 8]]

230

[[0], [1, 2], [3], [5], [6, 7, 8]]

231

232

Case with too large individual chunk (must return valid chunk)

232

Case with too large individual chunk (must return valid chunk)

233

>>> list(_slicechunktosize(revlog, [0, 1], 2))

233

>>> list(_slicechunktosize(revlog, [0, 1], 2))

234

[[0], [1]]

234

[[0], [1]]

235

>>> list(_slicechunktosize(revlog, [1, 3], 1))

235

>>> list(_slicechunktosize(revlog, [1, 3], 1))

236

[[1], [3]]

236

[[1], [3]]

237

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

237

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

238

[[3], [5]]

238

[[3], [5]]

239

240

== mixed case ==

240

== mixed case ==

241

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

241

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

242

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

242

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

243

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

243

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

244

"""

244

"""

245

assert targetsize is None or 0 <= targetsize

245

assert targetsize is None or 0 <= targetsize

246

startdata = revlog.start(revs[0])

246

startdata = revlog.start(revs[0])

247

enddata = revlog.end(revs[-1])

247

enddata = revlog.end(revs[-1])

248

fullspan = enddata - startdata

248

fullspan = enddata - startdata

249

if targetsize is None or fullspan <= targetsize:

249

if targetsize is None or fullspan <= targetsize:

250

yield revs

250

yield revs

251

return

251

return

252

253

startrevidx = 0

253

startrevidx = 0

254

endrevidx = 1

254

endrevidx = 1

255

iterrevs = enumerate(revs)

255

iterrevs = enumerate(revs)

256

next(iterrevs) # skip first rev.

256

next(iterrevs) # skip first rev.

257

# first step: get snapshots out of the way

257

# first step: get snapshots out of the way

258

for idx, r in iterrevs:

258

for idx, r in iterrevs:

259

span = revlog.end(r) - startdata

259

span = revlog.end(r) - startdata

260

snapshot = revlog.issnapshot(r)

260

snapshot = revlog.issnapshot(r)

261

if span <= targetsize and snapshot:

261

if span <= targetsize and snapshot:

262

endrevidx = idx + 1

262

endrevidx = idx + 1

263

else:

263

else:

264

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

264

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

265

if chunk:

265

if chunk:

266

yield chunk

266

yield chunk

267

startrevidx = idx

267

startrevidx = idx

268

startdata = revlog.start(r)

268

startdata = revlog.start(r)

269

endrevidx = idx + 1

269

endrevidx = idx + 1

270

if not snapshot:

270

if not snapshot:

271

break

271

break

272

273

# for the others, we use binary slicing to quickly converge toward valid

273

# for the others, we use binary slicing to quickly converge toward valid

274

# chunks (otherwise, we might end up looking for start/end of many

274

# chunks (otherwise, we might end up looking for start/end of many

275

# revisions). This logic is not looking for the perfect slicing point, it

275

# revisions). This logic is not looking for the perfect slicing point, it

276

# focuses on quickly converging toward valid chunks.

276

# focuses on quickly converging toward valid chunks.

277

nbitem = len(revs)

277

nbitem = len(revs)

278

while (enddata - startdata) > targetsize:

278

while (enddata - startdata) > targetsize:

279

endrevidx = nbitem

279

endrevidx = nbitem

280

if nbitem - startrevidx <= 1:

280

if nbitem - startrevidx <= 1:

281

break # protect against individual chunk larger than limit

281

break # protect against individual chunk larger than limit

282

localenddata = revlog.end(revs[endrevidx - 1])

282

localenddata = revlog.end(revs[endrevidx - 1])

283

span = localenddata - startdata

283

span = localenddata - startdata

284

while span > targetsize:

284

while span > targetsize:

285

if endrevidx - startrevidx <= 1:

285

if endrevidx - startrevidx <= 1:

286

break # protect against individual chunk larger than limit

286

break # protect against individual chunk larger than limit

287

endrevidx -= (endrevidx - startrevidx) // 2

287

endrevidx -= (endrevidx - startrevidx) // 2

288

localenddata = revlog.end(revs[endrevidx - 1])

288

localenddata = revlog.end(revs[endrevidx - 1])

289

span = localenddata - startdata

289

span = localenddata - startdata

290

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

290

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

291

if chunk:

291

if chunk:

292

yield chunk

292

yield chunk

293

startrevidx = endrevidx

293

startrevidx = endrevidx

294

startdata = revlog.start(revs[startrevidx])

294

startdata = revlog.start(revs[startrevidx])

295

296

chunk = _trimchunk(revlog, revs, startrevidx)

296

chunk = _trimchunk(revlog, revs, startrevidx)

297

if chunk:

297

if chunk:

298

yield chunk

298

yield chunk

299

300

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

300

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

301

mingapsize=0):

301

mingapsize=0):

302

"""slice revs to reduce the amount of unrelated data to be read from disk.

302

"""slice revs to reduce the amount of unrelated data to be read from disk.

303

304

``revs`` is sliced into groups that should be read in one time.

304

``revs`` is sliced into groups that should be read in one time.

305

Assume that revs are sorted.

305

Assume that revs are sorted.

306

307

The initial chunk is sliced until the overall density (payload/chunks-span

307

The initial chunk is sliced until the overall density (payload/chunks-span

308

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

308

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

309

skipped.

309

skipped.

310

311

>>> revlog = _testrevlog([

311

>>> revlog = _testrevlog([

312

... 5, #00 (5)

312

... 5, #00 (5)

313

... 10, #01 (5)

313

... 10, #01 (5)

314

... 12, #02 (2)

314

... 12, #02 (2)

315

... 12, #03 (empty)

315

... 12, #03 (empty)

316

... 27, #04 (15)

316

... 27, #04 (15)

317

... 31, #05 (4)

317

... 31, #05 (4)

318

... 31, #06 (empty)

318

... 31, #06 (empty)

319

... 42, #07 (11)

319

... 42, #07 (11)

320

... 47, #08 (5)

320

... 47, #08 (5)

321

... 47, #09 (empty)

321

... 47, #09 (empty)

322

... 48, #10 (1)

322

... 48, #10 (1)

323

... 51, #11 (3)

323

... 51, #11 (3)

324

... 74, #12 (23)

324

... 74, #12 (23)

325

... 85, #13 (11)

325

... 85, #13 (11)

326

... 86, #14 (1)

326

... 86, #14 (1)

327

... 91, #15 (5)

327

... 91, #15 (5)

328

... ])

328

... ])

329

330

>>> list(_slicechunktodensity(revlog, list(range(16))))

330

>>> list(_slicechunktodensity(revlog, list(range(16))))

331

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

331

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

332

>>> list(_slicechunktodensity(revlog, [0, 15]))

332

>>> list(_slicechunktodensity(revlog, [0, 15]))

333

[[0], [15]]

333

[[0], [15]]

334

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

334

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

335

[[0], [11], [15]]

335

[[0], [11], [15]]

336

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

336

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

337

[[0], [11, 13, 15]]

337

[[0], [11, 13, 15]]

338

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

338

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

339

[[1, 2], [5, 8, 10, 11], [14]]

339

[[1, 2], [5, 8, 10, 11], [14]]

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

341

... mingapsize=20))

341

... mingapsize=20))

342

[[1, 2, 3, 5, 8, 10, 11], [14]]

342

[[1, 2, 3, 5, 8, 10, 11], [14]]

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

344

... targetdensity=0.95))

344

... targetdensity=0.95))

345

[[1, 2], [5], [8, 10, 11], [14]]

345

[[1, 2], [5], [8, 10, 11], [14]]

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

347

... targetdensity=0.95, mingapsize=12))

347

... targetdensity=0.95, mingapsize=12))

348

[[1, 2], [5, 8, 10, 11], [14]]

348

[[1, 2], [5, 8, 10, 11], [14]]

349

"""

349

"""

350

start = revlog.start

350

start = revlog.start

351

length = revlog.length

351

length = revlog.length

352

353

if len(revs) <= 1:

353

if len(revs) <= 1:

354

yield revs

354

yield revs

355

return

355

return

356

357

deltachainspan = segmentspan(revlog, revs)

357

deltachainspan = segmentspan(revlog, revs)

358

359

if deltachainspan < mingapsize:

359

if deltachainspan < mingapsize:

360

yield revs

360

yield revs

361

return

361

return

362

363

readdata = deltachainspan

363

readdata = deltachainspan

364

chainpayload = sum(length(r) for r in revs)

364

chainpayload = sum(length(r) for r in revs)

365

366

if deltachainspan:

366

if deltachainspan:

367

density = chainpayload / float(deltachainspan)

367

density = chainpayload / float(deltachainspan)

368

else:

368

else:

369

density = 1.0

369

density = 1.0

370

371

if density >= targetdensity:

371

if density >= targetdensity:

372

yield revs

372

yield revs

373

return

373

return

374

375

# Store the gaps in a heap to have them sorted by decreasing size

375

# Store the gaps in a heap to have them sorted by decreasing size

376

gaps = []

376

gaps = []

377

prevend = None

377

prevend = None

378

for i, rev in enumerate(revs):

378

for i, rev in enumerate(revs):

379

revstart = start(rev)

379

revstart = start(rev)

380

revlen = length(rev)

380

revlen = length(rev)

381

382

# Skip empty revisions to form larger holes

382

# Skip empty revisions to form larger holes

383

if revlen == 0:

383

if revlen == 0:

384

continue

384

continue

385

386

if prevend is not None:

386

if prevend is not None:

387

gapsize = revstart - prevend

387

gapsize = revstart - prevend

388

# only consider holes that are large enough

388

# only consider holes that are large enough

389

if gapsize > mingapsize:

389

if gapsize > mingapsize:

390

gaps.append((gapsize, i))

390

gaps.append((gapsize, i))

391

392

prevend = revstart + revlen

392

prevend = revstart + revlen

393

# sort the gaps to pop them from largest to small

393

# sort the gaps to pop them from largest to small

394

gaps.sort()

394

gaps.sort()

395

396

# Collect the indices of the largest holes until the density is acceptable

396

# Collect the indices of the largest holes until the density is acceptable

397

selected = []

397

selected = []

398

while gaps and density < targetdensity:

398

while gaps and density < targetdensity:

399

gapsize, gapidx = gaps.pop()

399

gapsize, gapidx = gaps.pop()

400

401

selected.append(gapidx)

401

selected.append(gapidx)

402

403

# the gap sizes are stored as negatives to be sorted decreasingly

403

# the gap sizes are stored as negatives to be sorted decreasingly

404

# by the heap

404

# by the heap

405

readdata -= gapsize

405

readdata -= gapsize

406

if readdata > 0:

406

if readdata > 0:

407

density = chainpayload / float(readdata)

407

density = chainpayload / float(readdata)

408

else:

408

else:

409

density = 1.0

409

density = 1.0

410

selected.sort()

410

selected.sort()

411

412

# Cut the revs at collected indices

412

# Cut the revs at collected indices

413

previdx = 0

413

previdx = 0

414

for idx in selected:

414

for idx in selected:

415

416

chunk = _trimchunk(revlog, revs, previdx, idx)

416

chunk = _trimchunk(revlog, revs, previdx, idx)

417

if chunk:

417

if chunk:

418

yield chunk

418

yield chunk

419

420

previdx = idx

420

previdx = idx

421

422

chunk = _trimchunk(revlog, revs, previdx)

422

chunk = _trimchunk(revlog, revs, previdx)

423

if chunk:

423

if chunk:

424

yield chunk

424

yield chunk

425

426

def _trimchunk(revlog, revs, startidx, endidx=None):

426

def _trimchunk(revlog, revs, startidx, endidx=None):

427

"""returns revs[startidx:endidx] without empty trailing revs

427

"""returns revs[startidx:endidx] without empty trailing revs

428

429

Doctest Setup

429

Doctest Setup

430

>>> revlog = _testrevlog([

430

>>> revlog = _testrevlog([

431

... 5, #0

431

... 5, #0

432

... 10, #1

432

... 10, #1

433

... 12, #2

433

... 12, #2

434

... 12, #3 (empty)

434

... 12, #3 (empty)

435

... 17, #4

435

... 17, #4

436

... 21, #5

436

... 21, #5

437

... 21, #6 (empty)

437

... 21, #6 (empty)

438

... ])

438

... ])

439

440

Contiguous cases:

440

Contiguous cases:

441

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

441

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

442

[0, 1, 2, 3, 4, 5]

442

[0, 1, 2, 3, 4, 5]

443

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

443

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

444

[0, 1, 2, 3, 4]

444

[0, 1, 2, 3, 4]

445

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

445

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

446

[0, 1, 2]

446

[0, 1, 2]

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

448

[2]

448

[2]

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

450

[3, 4, 5]

450

[3, 4, 5]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

452

[3, 4]

452

[3, 4]

453

454

Discontiguous cases:

454

Discontiguous cases:

455

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

455

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

456

[1, 3, 5]

456

[1, 3, 5]

457

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

457

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

458

[1]

458

[1]

459

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

459

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

460

[3, 5]

460

[3, 5]

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

462

[3, 5]

462

[3, 5]

463

"""

463

"""

464

length = revlog.length

464

length = revlog.length

465

466

if endidx is None:

466

if endidx is None:

467

endidx = len(revs)

467

endidx = len(revs)

468

469

# If we have a non-emtpy delta candidate, there are nothing to trim

469

# If we have a non-emtpy delta candidate, there are nothing to trim

470

if revs[endidx - 1] < len(revlog):

470

if revs[endidx - 1] < len(revlog):

471

# Trim empty revs at the end, except the very first revision of a chain

471

# Trim empty revs at the end, except the very first revision of a chain

472

while (endidx > 1

472

while (endidx > 1

473

and endidx > startidx

473

and endidx > startidx

474

and length(revs[endidx - 1]) == 0):

474

and length(revs[endidx - 1]) == 0):

475

endidx -= 1

475

endidx -= 1

476

477

return revs[startidx:endidx]

477

return revs[startidx:endidx]

478

479

def segmentspan(revlog, revs):

479

def segmentspan(revlog, revs):

480

"""Get the byte span of a segment of revisions

480

"""Get the byte span of a segment of revisions

481

482

revs is a sorted array of revision numbers

482

revs is a sorted array of revision numbers

483

484

>>> revlog = _testrevlog([

484

>>> revlog = _testrevlog([

485

... 5, #0

485

... 5, #0

486

... 10, #1

486

... 10, #1

487

... 12, #2

487

... 12, #2

488

... 12, #3 (empty)

488

... 12, #3 (empty)

489

... 17, #4

489

... 17, #4

490

... ])

490

... ])

491

492

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

492

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

493

17

493

17

494

>>> segmentspan(revlog, [0, 4])

494

>>> segmentspan(revlog, [0, 4])

495

17

495

17

496

>>> segmentspan(revlog, [3, 4])

496

>>> segmentspan(revlog, [3, 4])

497

5

497

5

498

>>> segmentspan(revlog, [1, 2, 3,])

498

>>> segmentspan(revlog, [1, 2, 3,])

499

7

499

7

500

>>> segmentspan(revlog, [1, 3])

500

>>> segmentspan(revlog, [1, 3])

501

7

501

7

502

"""

502

"""

503

if not revs:

503

if not revs:

504

return 0

504

return 0

505

end = revlog.end(revs[-1])

505

end = revlog.end(revs[-1])

506

return end - revlog.start(revs[0])

506

return end - revlog.start(revs[0])

507

508

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

508

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

509

"""build full text from a (base, delta) pair and other metadata"""

509

"""build full text from a (base, delta) pair and other metadata"""

510

# special case deltas which replace entire base; no need to decode

510

# special case deltas which replace entire base; no need to decode

511

# base revision. this neatly avoids censored bases, which throw when

511

# base revision. this neatly avoids censored bases, which throw when

512

# they're decoded.

512

# they're decoded.

513

hlen = struct.calcsize(">lll")

513

hlen = struct.calcsize(">lll")

514

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

514

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

515

len(delta) - hlen):

515

len(delta) - hlen):

516

fulltext = delta[hlen:]

516

fulltext = delta[hlen:]

517

else:

517

else:

518

# deltabase is rawtext before changed by flag processors, which is

518

# deltabase is rawtext before changed by flag processors, which is

519

# equivalent to non-raw text

519

# equivalent to non-raw text

520

basetext = revlog.revision(baserev, _df=fh, raw=False)

520

basetext = revlog.revision(baserev, _df=fh, raw=False)

521

fulltext = mdiff.patch(basetext, delta)

521

fulltext = mdiff.patch(basetext, delta)

522

523

try:

523

try:

524

~~res~~ = revlog._processflags(fulltext, flags, ~~'read'~~, ~~raw~~=~~True~~)

524

validatehash = revlog._processflagsraw(fulltext, flags)

525

fulltext, validatehash = res

526

if validatehash:

525

if validatehash:

527

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

526

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

528

if flags & REVIDX_ISCENSORED:

527

if flags & REVIDX_ISCENSORED:

529

raise error.StorageError(_('node %s is not censored') %

528

raise error.StorageError(_('node %s is not censored') %

530

expectednode)

529

expectednode)

531

except error.CensoredNodeError:

530

except error.CensoredNodeError:

532

# must pass the censored index flag to add censored revisions

531

# must pass the censored index flag to add censored revisions

533

if not flags & REVIDX_ISCENSORED:

532

if not flags & REVIDX_ISCENSORED:

534

raise

533

raise

535

return fulltext

534

return fulltext

536

535

537

@attr.s(slots=True, frozen=True)

536

@attr.s(slots=True, frozen=True)

538

class _deltainfo(object):

537

class _deltainfo(object):

539

distance = attr.ib()

538

distance = attr.ib()

540

deltalen = attr.ib()

539

deltalen = attr.ib()

541

data = attr.ib()

540

data = attr.ib()

542

base = attr.ib()

541

base = attr.ib()

543

chainbase = attr.ib()

542

chainbase = attr.ib()

544

chainlen = attr.ib()

543

chainlen = attr.ib()

545

compresseddeltalen = attr.ib()

544

compresseddeltalen = attr.ib()

546

snapshotdepth = attr.ib()

545

snapshotdepth = attr.ib()

547

546

548

def isgooddeltainfo(revlog, deltainfo, revinfo):

547

def isgooddeltainfo(revlog, deltainfo, revinfo):

549

"""Returns True if the given delta is good. Good means that it is within

548

"""Returns True if the given delta is good. Good means that it is within

550

the disk span, disk size, and chain length bounds that we know to be

549

the disk span, disk size, and chain length bounds that we know to be

551

performant."""

550

performant."""

552

if deltainfo is None:

551

if deltainfo is None:

553

return False

552

return False

554

553

555

# - 'deltainfo.distance' is the distance from the base revision --

554

# - 'deltainfo.distance' is the distance from the base revision --

556

# bounding it limits the amount of I/O we need to do.

555

# bounding it limits the amount of I/O we need to do.

557

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

556

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

558

# deltas we need to apply -- bounding it limits the amount of CPU

557

# deltas we need to apply -- bounding it limits the amount of CPU

559

# we consume.

558

# we consume.

560

559

561

textlen = revinfo.textlen

560

textlen = revinfo.textlen

562

defaultmax = textlen * 4

561

defaultmax = textlen * 4

563

maxdist = revlog._maxdeltachainspan

562

maxdist = revlog._maxdeltachainspan

564

if not maxdist:

563

if not maxdist:

565

maxdist = deltainfo.distance # ensure the conditional pass

564

maxdist = deltainfo.distance # ensure the conditional pass

566

maxdist = max(maxdist, defaultmax)

565

maxdist = max(maxdist, defaultmax)

567

566

568

# Bad delta from read span:

567

# Bad delta from read span:

569

#

568

#

570

# If the span of data read is larger than the maximum allowed.

569

# If the span of data read is larger than the maximum allowed.

571

#

570

#

572

# In the sparse-revlog case, we rely on the associated "sparse reading"

571

# In the sparse-revlog case, we rely on the associated "sparse reading"

573

# to avoid issue related to the span of data. In theory, it would be

572

# to avoid issue related to the span of data. In theory, it would be

574

# possible to build pathological revlog where delta pattern would lead

573

# possible to build pathological revlog where delta pattern would lead

575

# to too many reads. However, they do not happen in practice at all. So

574

# to too many reads. However, they do not happen in practice at all. So

576

# we skip the span check entirely.

575

# we skip the span check entirely.

577

if not revlog._sparserevlog and maxdist < deltainfo.distance:

576

if not revlog._sparserevlog and maxdist < deltainfo.distance:

578

return False

577

return False

579

578

580

# Bad delta from new delta size:

579

# Bad delta from new delta size:

581

#

580

#

582

# If the delta size is larger than the target text, storing the

581

# If the delta size is larger than the target text, storing the

583

# delta will be inefficient.

582

# delta will be inefficient.

584

if textlen < deltainfo.deltalen:

583

if textlen < deltainfo.deltalen:

585

return False

584

return False

586

585

587

# Bad delta from cumulated payload size:

586

# Bad delta from cumulated payload size:

588

#

587

#

589

# If the sum of delta get larger than K * target text length.

588

# If the sum of delta get larger than K * target text length.

590

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

589

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

591

return False

590

return False

592

591

593

# Bad delta from chain length:

592

# Bad delta from chain length:

594

#

593

#

595

# If the number of delta in the chain gets too high.

594

# If the number of delta in the chain gets too high.

596

if (revlog._maxchainlen

595

if (revlog._maxchainlen

597

and revlog._maxchainlen < deltainfo.chainlen):

596

and revlog._maxchainlen < deltainfo.chainlen):

598

return False

597

return False

599

598

600

# bad delta from intermediate snapshot size limit

599

# bad delta from intermediate snapshot size limit

601

#

600

#

602

# If an intermediate snapshot size is higher than the limit. The

601

# If an intermediate snapshot size is higher than the limit. The

603

# limit exist to prevent endless chain of intermediate delta to be

602

# limit exist to prevent endless chain of intermediate delta to be

604

# created.

603

# created.

605

if (deltainfo.snapshotdepth is not None and

604

if (deltainfo.snapshotdepth is not None and

606

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

605

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

607

return False

606

return False

608

607

609

# bad delta if new intermediate snapshot is larger than the previous

608

# bad delta if new intermediate snapshot is larger than the previous

610

# snapshot

609

# snapshot

611

if (deltainfo.snapshotdepth

610

if (deltainfo.snapshotdepth

612

and revlog.length(deltainfo.base) < deltainfo.deltalen):

611

and revlog.length(deltainfo.base) < deltainfo.deltalen):

613

return False

612

return False

614

613

615

return True

614

return True

616

615

617

# If a revision's full text is that much bigger than a base candidate full

616

# If a revision's full text is that much bigger than a base candidate full

618

# text's, it is very unlikely that it will produce a valid delta. We no longer

617

# text's, it is very unlikely that it will produce a valid delta. We no longer

619

# consider these candidates.

618

# consider these candidates.

620

LIMIT_BASE2TEXT = 500

619

LIMIT_BASE2TEXT = 500

621

620

622

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

621

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

623

"""Provides group of revision to be tested as delta base

622

"""Provides group of revision to be tested as delta base

624

623

625

This top level function focus on emitting groups with unique and worthwhile

624

This top level function focus on emitting groups with unique and worthwhile

626

content. See _raw_candidate_groups for details about the group order.

625

content. See _raw_candidate_groups for details about the group order.

627

"""

626

"""

628

# should we try to build a delta?

627

# should we try to build a delta?

629

if not (len(revlog) and revlog._storedeltachains):

628

if not (len(revlog) and revlog._storedeltachains):

630

yield None

629

yield None

631

return

630

return

632

631

633

deltalength = revlog.length

632

deltalength = revlog.length

634

deltaparent = revlog.deltaparent

633

deltaparent = revlog.deltaparent

635

sparse = revlog._sparserevlog

634

sparse = revlog._sparserevlog

636

good = None

635

good = None

637

636

638

deltas_limit = textlen * LIMIT_DELTA2TEXT

637

deltas_limit = textlen * LIMIT_DELTA2TEXT

639

638

640

tested = {nullrev}

639

tested = {nullrev}

641

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

640

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

642

while True:

641

while True:

643

temptative = candidates.send(good)

642

temptative = candidates.send(good)

644

if temptative is None:

643

if temptative is None:

645

break

644

break

646

group = []

645

group = []

647

for rev in temptative:

646

for rev in temptative:

648

# skip over empty delta (no need to include them in a chain)

647

# skip over empty delta (no need to include them in a chain)

649

while (revlog._generaldelta

648

while (revlog._generaldelta

650

and not (rev == nullrev

649

and not (rev == nullrev

651

or rev in tested

650

or rev in tested

652

or deltalength(rev))):

651

or deltalength(rev))):

653

tested.add(rev)

652

tested.add(rev)

654

rev = deltaparent(rev)

653

rev = deltaparent(rev)

655

# no need to try a delta against nullrev, this will be done as a

654

# no need to try a delta against nullrev, this will be done as a

656

# last resort.

655

# last resort.

657

if rev == nullrev:

656

if rev == nullrev:

658

continue

657

continue

659

# filter out revision we tested already

658

# filter out revision we tested already

660

if rev in tested:

659

if rev in tested:

661

continue

660

continue

662

tested.add(rev)

661

tested.add(rev)

663

# filter out delta base that will never produce good delta

662

# filter out delta base that will never produce good delta

664

if deltas_limit < revlog.length(rev):

663

if deltas_limit < revlog.length(rev):

665

continue

664

continue

666

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

665

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

667

continue

666

continue

668

# no delta for rawtext-changing revs (see "candelta" for why)

667

# no delta for rawtext-changing revs (see "candelta" for why)

669

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

668

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

670

continue

669

continue

671

# If we reach here, we are about to build and test a delta.

670

# If we reach here, we are about to build and test a delta.

672

# The delta building process will compute the chaininfo in all

671

# The delta building process will compute the chaininfo in all

673

# case, since that computation is cached, it is fine to access it

672

# case, since that computation is cached, it is fine to access it

674

# here too.

673

# here too.

675

chainlen, chainsize = revlog._chaininfo(rev)

674

chainlen, chainsize = revlog._chaininfo(rev)

676

# if chain will be too long, skip base

675

# if chain will be too long, skip base

677

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

676

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

678

continue

677

continue

679

# if chain already have too much data, skip base

678

# if chain already have too much data, skip base

680

if deltas_limit < chainsize:

679

if deltas_limit < chainsize:

681

continue

680

continue

682

if sparse and revlog.upperboundcomp is not None:

681

if sparse and revlog.upperboundcomp is not None:

683

maxcomp = revlog.upperboundcomp

682

maxcomp = revlog.upperboundcomp

684

basenotsnap = (p1, p2, nullrev)

683

basenotsnap = (p1, p2, nullrev)

685

if rev not in basenotsnap and revlog.issnapshot(rev):

684

if rev not in basenotsnap and revlog.issnapshot(rev):

686

snapshotdepth = revlog.snapshotdepth(rev)

685

snapshotdepth = revlog.snapshotdepth(rev)

687

# If text is significantly larger than the base, we can

686

# If text is significantly larger than the base, we can

688

# expect the resulting delta to be proportional to the size

687

# expect the resulting delta to be proportional to the size

689

# difference

688

# difference

690

revsize = revlog.rawsize(rev)

689

revsize = revlog.rawsize(rev)

691

rawsizedistance = max(textlen - revsize, 0)

690

rawsizedistance = max(textlen - revsize, 0)

692

# use an estimate of the compression upper bound.

691

# use an estimate of the compression upper bound.

693

lowestrealisticdeltalen = rawsizedistance // maxcomp

692

lowestrealisticdeltalen = rawsizedistance // maxcomp

694

693

695

# check the absolute constraint on the delta size

694

# check the absolute constraint on the delta size

696

snapshotlimit = textlen >> snapshotdepth

695

snapshotlimit = textlen >> snapshotdepth

697

if snapshotlimit < lowestrealisticdeltalen:

696

if snapshotlimit < lowestrealisticdeltalen:

698

# delta lower bound is larger than accepted upper bound

697

# delta lower bound is larger than accepted upper bound

699

continue

698

continue

700

699

701

# check the relative constraint on the delta size

700

# check the relative constraint on the delta size

702

revlength = revlog.length(rev)

701

revlength = revlog.length(rev)

703

if revlength < lowestrealisticdeltalen:

702

if revlength < lowestrealisticdeltalen:

704

# delta probable lower bound is larger than target base

703

# delta probable lower bound is larger than target base

705

continue

704

continue

706

705

707

group.append(rev)

706

group.append(rev)

708

if group:

707

if group:

709

# XXX: in the sparse revlog case, group can become large,

708

# XXX: in the sparse revlog case, group can become large,

710

# impacting performances. Some bounding or slicing mecanism

709

# impacting performances. Some bounding or slicing mecanism

711

# would help to reduce this impact.

710

# would help to reduce this impact.

712

good = yield tuple(group)

711

good = yield tuple(group)

713

yield None

712

yield None

714

713

715

def _findsnapshots(revlog, cache, start_rev):

714

def _findsnapshots(revlog, cache, start_rev):

716

"""find snapshot from start_rev to tip"""

715

"""find snapshot from start_rev to tip"""

717

if util.safehasattr(revlog.index, 'findsnapshots'):

716

if util.safehasattr(revlog.index, 'findsnapshots'):

718

revlog.index.findsnapshots(cache, start_rev)

717

revlog.index.findsnapshots(cache, start_rev)

719

else:

718

else:

720

deltaparent = revlog.deltaparent

719

deltaparent = revlog.deltaparent

721

issnapshot = revlog.issnapshot

720

issnapshot = revlog.issnapshot

722

for rev in revlog.revs(start_rev):

721

for rev in revlog.revs(start_rev):

723

if issnapshot(rev):

722

if issnapshot(rev):

724

cache[deltaparent(rev)].append(rev)

723

cache[deltaparent(rev)].append(rev)

725

724

726

def _refinedgroups(revlog, p1, p2, cachedelta):

725

def _refinedgroups(revlog, p1, p2, cachedelta):

727

good = None

726

good = None

728

# First we try to reuse a the delta contained in the bundle.

727

# First we try to reuse a the delta contained in the bundle.

729

# (or from the source revlog)

728

# (or from the source revlog)

730

#

729

#

731

# This logic only applies to general delta repositories and can be disabled

730

# This logic only applies to general delta repositories and can be disabled

732

# through configuration. Disabling reuse source delta is useful when

731

# through configuration. Disabling reuse source delta is useful when

733

# we want to make sure we recomputed "optimal" deltas.

732

# we want to make sure we recomputed "optimal" deltas.

734

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

733

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

735

# Assume what we received from the server is a good choice

734

# Assume what we received from the server is a good choice

736

# build delta will reuse the cache

735

# build delta will reuse the cache

737

good = yield (cachedelta[0],)

736

good = yield (cachedelta[0],)

738

if good is not None:

737

if good is not None:

739

yield None

738

yield None

740

return

739

return

741

snapshots = collections.defaultdict(list)

740

snapshots = collections.defaultdict(list)

742

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

741

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

743

good = yield candidates

742

good = yield candidates

744

if good is not None:

743

if good is not None:

745

break

744

break

746

745

747

# If sparse revlog is enabled, we can try to refine the available deltas

746

# If sparse revlog is enabled, we can try to refine the available deltas

748

if not revlog._sparserevlog:

747

if not revlog._sparserevlog:

749

yield None

748

yield None

750

return

749

return

751

750

752

# if we have a refinable value, try to refine it

751

# if we have a refinable value, try to refine it

753

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

752

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

754

# refine snapshot down

753

# refine snapshot down

755

previous = None

754

previous = None

756

while previous != good:

755

while previous != good:

757

previous = good

756

previous = good

758

base = revlog.deltaparent(good)

757

base = revlog.deltaparent(good)

759

if base == nullrev:

758

if base == nullrev:

760

break

759

break

761

good = yield (base,)

760

good = yield (base,)

762

# refine snapshot up

761

# refine snapshot up

763

if not snapshots:

762

if not snapshots:

764

_findsnapshots(revlog, snapshots, good + 1)

763

_findsnapshots(revlog, snapshots, good + 1)

765

previous = None

764

previous = None

766

while good != previous:

765

while good != previous:

767

previous = good

766

previous = good

768

children = tuple(sorted(c for c in snapshots[good]))

767

children = tuple(sorted(c for c in snapshots[good]))

769

good = yield children

768

good = yield children

770

769

771

# we have found nothing

770

# we have found nothing

772

yield None

771

yield None

773

772

774

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

773

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

775

"""Provides group of revision to be tested as delta base

774

"""Provides group of revision to be tested as delta base

776

775

777

This lower level function focus on emitting delta theorically interresting

776

This lower level function focus on emitting delta theorically interresting

778

without looking it any practical details.

777

without looking it any practical details.

779

778

780

The group order aims at providing fast or small candidates first.

779

The group order aims at providing fast or small candidates first.

781

"""

780

"""

782

gdelta = revlog._generaldelta

781

gdelta = revlog._generaldelta

783

# gate sparse behind general-delta because of issue6056

782

# gate sparse behind general-delta because of issue6056

784

sparse = gdelta and revlog._sparserevlog

783

sparse = gdelta and revlog._sparserevlog

785

curr = len(revlog)

784

curr = len(revlog)

786

prev = curr - 1

785

prev = curr - 1

787

deltachain = lambda rev: revlog._deltachain(rev)[0]

786

deltachain = lambda rev: revlog._deltachain(rev)[0]

788

787

789

if gdelta:

788

if gdelta:

790

# exclude already lazy tested base if any

789

# exclude already lazy tested base if any

791

parents = [p for p in (p1, p2) if p != nullrev]

790

parents = [p for p in (p1, p2) if p != nullrev]

792

791

793

if not revlog._deltabothparents and len(parents) == 2:

792

if not revlog._deltabothparents and len(parents) == 2:

794

parents.sort()

793

parents.sort()

795

# To minimize the chance of having to build a fulltext,

794

# To minimize the chance of having to build a fulltext,

796

# pick first whichever parent is closest to us (max rev)

795

# pick first whichever parent is closest to us (max rev)

797

yield (parents[1],)

796

yield (parents[1],)

798

# then the other one (min rev) if the first did not fit

797

# then the other one (min rev) if the first did not fit

799

yield (parents[0],)

798

yield (parents[0],)

800

elif len(parents) > 0:

799

elif len(parents) > 0:

801

# Test all parents (1 or 2), and keep the best candidate

800

# Test all parents (1 or 2), and keep the best candidate

802

yield parents

801

yield parents

803

802

804

if sparse and parents:

803

if sparse and parents:

805

if snapshots is None:

804

if snapshots is None:

806

# map: base-rev: snapshot-rev

805

# map: base-rev: snapshot-rev

807

snapshots = collections.defaultdict(list)

806

snapshots = collections.defaultdict(list)

808

# See if we can use an existing snapshot in the parent chains to use as

807

# See if we can use an existing snapshot in the parent chains to use as

809

# a base for a new intermediate-snapshot

808

# a base for a new intermediate-snapshot

810

#

809

#

811

# search for snapshot in parents delta chain

810

# search for snapshot in parents delta chain

812

# map: snapshot-level: snapshot-rev

811

# map: snapshot-level: snapshot-rev

813

parents_snaps = collections.defaultdict(set)

812

parents_snaps = collections.defaultdict(set)

814

candidate_chains = [deltachain(p) for p in parents]

813

candidate_chains = [deltachain(p) for p in parents]

815

for chain in candidate_chains:

814

for chain in candidate_chains:

816

for idx, s in enumerate(chain):

815

for idx, s in enumerate(chain):

817

if not revlog.issnapshot(s):

816

if not revlog.issnapshot(s):

818

break

817

break

819

parents_snaps[idx].add(s)

818

parents_snaps[idx].add(s)

820

snapfloor = min(parents_snaps[0]) + 1

819

snapfloor = min(parents_snaps[0]) + 1

821

_findsnapshots(revlog, snapshots, snapfloor)

820

_findsnapshots(revlog, snapshots, snapfloor)

822

# search for the highest "unrelated" revision

821

# search for the highest "unrelated" revision

823

#

822

#

824

# Adding snapshots used by "unrelated" revision increase the odd we

823

# Adding snapshots used by "unrelated" revision increase the odd we

825

# reuse an independant, yet better snapshot chain.

824

# reuse an independant, yet better snapshot chain.

826

#

825

#

827

# XXX instead of building a set of revisions, we could lazily enumerate

826

# XXX instead of building a set of revisions, we could lazily enumerate

828

# over the chains. That would be more efficient, however we stick to

827

# over the chains. That would be more efficient, however we stick to

829

# simple code for now.

828

# simple code for now.

830

all_revs = set()

829

all_revs = set()

831

for chain in candidate_chains:

830

for chain in candidate_chains:

832

all_revs.update(chain)

831

all_revs.update(chain)

833

other = None

832

other = None

834

for r in revlog.revs(prev, snapfloor):

833

for r in revlog.revs(prev, snapfloor):

835

if r not in all_revs:

834

if r not in all_revs:

836

other = r

835

other = r

837

break

836

break

838

if other is not None:

837

if other is not None:

839

# To avoid unfair competition, we won't use unrelated intermediate

838

# To avoid unfair competition, we won't use unrelated intermediate

840

# snapshot that are deeper than the ones from the parent delta

839

# snapshot that are deeper than the ones from the parent delta

841

# chain.

840

# chain.

842

max_depth = max(parents_snaps.keys())

841

max_depth = max(parents_snaps.keys())

843

chain = deltachain(other)

842

chain = deltachain(other)

844

for idx, s in enumerate(chain):

843

for idx, s in enumerate(chain):

845

if s < snapfloor:

844

if s < snapfloor:

846

continue

845

continue

847

if max_depth < idx:

846

if max_depth < idx:

848

break

847

break

849

if not revlog.issnapshot(s):

848

if not revlog.issnapshot(s):

850

break

849

break

851

parents_snaps[idx].add(s)

850

parents_snaps[idx].add(s)

852

# Test them as possible intermediate snapshot base

851

# Test them as possible intermediate snapshot base

853

# We test them from highest to lowest level. High level one are more

852

# We test them from highest to lowest level. High level one are more

854

# likely to result in small delta

853

# likely to result in small delta

855

floor = None

854

floor = None

856

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

855

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

857

siblings = set()

856

siblings = set()

858

for s in snaps:

857

for s in snaps:

859

siblings.update(snapshots[s])

858

siblings.update(snapshots[s])

860

# Before considering making a new intermediate snapshot, we check

859

# Before considering making a new intermediate snapshot, we check

861

# if an existing snapshot, children of base we consider, would be

860

# if an existing snapshot, children of base we consider, would be

862

# suitable.

861

# suitable.

863

#

862

#

864

# It give a change to reuse a delta chain "unrelated" to the

863

# It give a change to reuse a delta chain "unrelated" to the

865

# current revision instead of starting our own. Without such

864

# current revision instead of starting our own. Without such

866

# re-use, topological branches would keep reopening new chains.

865

# re-use, topological branches would keep reopening new chains.

867

# Creating more and more snapshot as the repository grow.

866

# Creating more and more snapshot as the repository grow.

868

867

869

if floor is not None:

868

if floor is not None:

870

# We only do this for siblings created after the one in our

869

# We only do this for siblings created after the one in our

871

# parent's delta chain. Those created before has less chances

870

# parent's delta chain. Those created before has less chances

872

# to be valid base since our ancestors had to create a new

871

# to be valid base since our ancestors had to create a new

873

# snapshot.

872

# snapshot.

874

siblings = [r for r in siblings if floor < r]

873

siblings = [r for r in siblings if floor < r]

875

yield tuple(sorted(siblings))

874

yield tuple(sorted(siblings))

876

# then test the base from our parent's delta chain.

875

# then test the base from our parent's delta chain.

877

yield tuple(sorted(snaps))

876

yield tuple(sorted(snaps))

878

floor = min(snaps)

877

floor = min(snaps)

879

# No suitable base found in the parent chain, search if any full

878

# No suitable base found in the parent chain, search if any full

880

# snapshots emitted since parent's base would be a suitable base for an

879

# snapshots emitted since parent's base would be a suitable base for an

881

# intermediate snapshot.

880

# intermediate snapshot.

882

#

881

#

883

# It give a chance to reuse a delta chain unrelated to the current

882

# It give a chance to reuse a delta chain unrelated to the current

884

# revisions instead of starting our own. Without such re-use,

883

# revisions instead of starting our own. Without such re-use,

885

# topological branches would keep reopening new full chains. Creating

884

# topological branches would keep reopening new full chains. Creating

886

# more and more snapshot as the repository grow.

885

# more and more snapshot as the repository grow.

887

yield tuple(snapshots[nullrev])

886

yield tuple(snapshots[nullrev])

888

887

889

if not sparse:

888

if not sparse:

890

# other approach failed try against prev to hopefully save us a

889

# other approach failed try against prev to hopefully save us a

891

# fulltext.

890

# fulltext.

892

yield (prev,)

891

yield (prev,)

893

892

894

class deltacomputer(object):

893

class deltacomputer(object):

895

def __init__(self, revlog):

894

def __init__(self, revlog):

896

self.revlog = revlog

895

self.revlog = revlog

897

896

898

def buildtext(self, revinfo, fh):

897

def buildtext(self, revinfo, fh):

899

"""Builds a fulltext version of a revision

898

"""Builds a fulltext version of a revision

900

899

901

revinfo: _revisioninfo instance that contains all needed info

900

revinfo: _revisioninfo instance that contains all needed info

902

fh: file handle to either the .i or the .d revlog file,

901

fh: file handle to either the .i or the .d revlog file,

903

depending on whether it is inlined or not

902

depending on whether it is inlined or not

904

"""

903

"""

905

btext = revinfo.btext

904

btext = revinfo.btext

906

if btext[0] is not None:

905

if btext[0] is not None:

907

return btext[0]

906

return btext[0]

908

907

909

revlog = self.revlog

908

revlog = self.revlog

910

cachedelta = revinfo.cachedelta

909

cachedelta = revinfo.cachedelta

911

baserev = cachedelta[0]

910

baserev = cachedelta[0]

912

delta = cachedelta[1]

911

delta = cachedelta[1]

913

912

914

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

913

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

915

revinfo.p1, revinfo.p2,

914

revinfo.p1, revinfo.p2,

916

revinfo.flags, revinfo.node)

915

revinfo.flags, revinfo.node)

917

return fulltext

916

return fulltext

918

917

919

def _builddeltadiff(self, base, revinfo, fh):

918

def _builddeltadiff(self, base, revinfo, fh):

920

revlog = self.revlog

919

revlog = self.revlog

921

t = self.buildtext(revinfo, fh)

920

t = self.buildtext(revinfo, fh)

922

if revlog.iscensored(base):

921

if revlog.iscensored(base):

923

# deltas based on a censored revision must replace the

922

# deltas based on a censored revision must replace the

924

# full content in one patch, so delta works everywhere

923

# full content in one patch, so delta works everywhere

925

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

924

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

926

delta = header + t

925

delta = header + t

927

else:

926

else:

928

ptext = revlog.rawdata(base, _df=fh)

927

ptext = revlog.rawdata(base, _df=fh)

929

delta = mdiff.textdiff(ptext, t)

928

delta = mdiff.textdiff(ptext, t)

930

929

931

return delta

930

return delta

932

931

933

def _builddeltainfo(self, revinfo, base, fh):

932

def _builddeltainfo(self, revinfo, base, fh):

934

# can we use the cached delta?

933

# can we use the cached delta?

935

revlog = self.revlog

934

revlog = self.revlog

936

chainbase = revlog.chainbase(base)

935

chainbase = revlog.chainbase(base)

937

if revlog._generaldelta:

936

if revlog._generaldelta:

938

deltabase = base

937

deltabase = base

939

else:

938

else:

940

deltabase = chainbase

939

deltabase = chainbase

941

snapshotdepth = None

940

snapshotdepth = None

942

if revlog._sparserevlog and deltabase == nullrev:

941

if revlog._sparserevlog and deltabase == nullrev:

943

snapshotdepth = 0

942

snapshotdepth = 0

944

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

943

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

945

# A delta chain should always be one full snapshot,

944

# A delta chain should always be one full snapshot,

946

# zero or more semi-snapshots, and zero or more deltas

945

# zero or more semi-snapshots, and zero or more deltas

947

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

946

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

948

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

947

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

949

snapshotdepth = len(revlog._deltachain(deltabase)[0])

948

snapshotdepth = len(revlog._deltachain(deltabase)[0])

950

delta = None

949

delta = None

951

if revinfo.cachedelta:

950

if revinfo.cachedelta:

952

cachebase, cachediff = revinfo.cachedelta

951

cachebase, cachediff = revinfo.cachedelta

953

#check if the diff still apply

952

#check if the diff still apply

954

currentbase = cachebase

953

currentbase = cachebase

955

while (currentbase != nullrev

954

while (currentbase != nullrev

956

and currentbase != base

955

and currentbase != base

957

and self.revlog.length(currentbase) == 0):

956

and self.revlog.length(currentbase) == 0):

958

currentbase = self.revlog.deltaparent(currentbase)

957

currentbase = self.revlog.deltaparent(currentbase)

959

if self.revlog._lazydelta and currentbase == base:

958

if self.revlog._lazydelta and currentbase == base:

960

delta = revinfo.cachedelta[1]

959

delta = revinfo.cachedelta[1]

961

if delta is None:

960

if delta is None:

962

delta = self._builddeltadiff(base, revinfo, fh)

961

delta = self._builddeltadiff(base, revinfo, fh)

963

# snapshotdept need to be neither None nor 0 level snapshot

962

# snapshotdept need to be neither None nor 0 level snapshot

964

if revlog.upperboundcomp is not None and snapshotdepth:

963

if revlog.upperboundcomp is not None and snapshotdepth:

965

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

964

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

966

snapshotlimit = revinfo.textlen >> snapshotdepth

965

snapshotlimit = revinfo.textlen >> snapshotdepth

967

if snapshotlimit < lowestrealisticdeltalen:

966

if snapshotlimit < lowestrealisticdeltalen:

968

return None

967

return None

969

if revlog.length(base) < lowestrealisticdeltalen:

968

if revlog.length(base) < lowestrealisticdeltalen:

970

return None

969

return None

971

header, data = revlog.compress(delta)

970

header, data = revlog.compress(delta)

972

deltalen = len(header) + len(data)

971

deltalen = len(header) + len(data)

973

offset = revlog.end(len(revlog) - 1)

972

offset = revlog.end(len(revlog) - 1)

974

dist = deltalen + offset - revlog.start(chainbase)

973

dist = deltalen + offset - revlog.start(chainbase)

975

chainlen, compresseddeltalen = revlog._chaininfo(base)

974

chainlen, compresseddeltalen = revlog._chaininfo(base)

976

chainlen += 1

975

chainlen += 1

977

compresseddeltalen += deltalen

976

compresseddeltalen += deltalen

978

977

979

return _deltainfo(dist, deltalen, (header, data), deltabase,

978

return _deltainfo(dist, deltalen, (header, data), deltabase,

980

chainbase, chainlen, compresseddeltalen,

979

chainbase, chainlen, compresseddeltalen,

981

snapshotdepth)

980

snapshotdepth)

982

981

983

def _fullsnapshotinfo(self, fh, revinfo):

982

def _fullsnapshotinfo(self, fh, revinfo):

984

curr = len(self.revlog)

983

curr = len(self.revlog)

985

rawtext = self.buildtext(revinfo, fh)

984

rawtext = self.buildtext(revinfo, fh)

986

data = self.revlog.compress(rawtext)

985

data = self.revlog.compress(rawtext)

987

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

986

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

988

deltabase = chainbase = curr

987

deltabase = chainbase = curr

989

snapshotdepth = 0

988

snapshotdepth = 0

990

chainlen = 1

989

chainlen = 1

991

990

992

return _deltainfo(dist, deltalen, data, deltabase,

991

return _deltainfo(dist, deltalen, data, deltabase,

993

chainbase, chainlen, compresseddeltalen,

992

chainbase, chainlen, compresseddeltalen,

994

snapshotdepth)

993

snapshotdepth)

995

994

996

def finddeltainfo(self, revinfo, fh):

995

def finddeltainfo(self, revinfo, fh):

997

"""Find an acceptable delta against a candidate revision

996

"""Find an acceptable delta against a candidate revision

998

997

999

revinfo: information about the revision (instance of _revisioninfo)

998

revinfo: information about the revision (instance of _revisioninfo)

1000

fh: file handle to either the .i or the .d revlog file,

999

fh: file handle to either the .i or the .d revlog file,

1001

depending on whether it is inlined or not

1000

depending on whether it is inlined or not

1002

1001

1003

Returns the first acceptable candidate revision, as ordered by

1002

Returns the first acceptable candidate revision, as ordered by

1004

_candidategroups

1003

_candidategroups

1005

1004

1006

If no suitable deltabase is found, we return delta info for a full

1005

If no suitable deltabase is found, we return delta info for a full

1007

snapshot.

1006

snapshot.

1008

"""

1007

"""

1009

if not revinfo.textlen:

1008

if not revinfo.textlen:

1010

return self._fullsnapshotinfo(fh, revinfo)

1009

return self._fullsnapshotinfo(fh, revinfo)

1011

1010

1012

# no delta for flag processor revision (see "candelta" for why)

1011

# no delta for flag processor revision (see "candelta" for why)

1013

# not calling candelta since only one revision needs test, also to

1012

# not calling candelta since only one revision needs test, also to

1014

# avoid overhead fetching flags again.

1013

# avoid overhead fetching flags again.

1015

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1014

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1016

return self._fullsnapshotinfo(fh, revinfo)

1015

return self._fullsnapshotinfo(fh, revinfo)

1017

1016

1018

cachedelta = revinfo.cachedelta

1017

cachedelta = revinfo.cachedelta

1019

p1 = revinfo.p1

1018

p1 = revinfo.p1

1020

p2 = revinfo.p2

1019

p2 = revinfo.p2

1021

revlog = self.revlog

1020

revlog = self.revlog

1022

1021

1023

deltainfo = None

1022

deltainfo = None

1024

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1023

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1025

groups = _candidategroups(self.revlog, revinfo.textlen,

1024

groups = _candidategroups(self.revlog, revinfo.textlen,

1026

p1r, p2r, cachedelta)

1025

p1r, p2r, cachedelta)

1027

candidaterevs = next(groups)

1026

candidaterevs = next(groups)

1028

while candidaterevs is not None:

1027

while candidaterevs is not None:

1029

nominateddeltas = []

1028

nominateddeltas = []

1030

if deltainfo is not None:

1029

if deltainfo is not None:

1031

# if we already found a good delta,

1030

# if we already found a good delta,

1032

# challenge it against refined candidates

1031

# challenge it against refined candidates

1033

nominateddeltas.append(deltainfo)

1032

nominateddeltas.append(deltainfo)

1034

for candidaterev in candidaterevs:

1033

for candidaterev in candidaterevs:

1035

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1034

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1036

if candidatedelta is not None:

1035

if candidatedelta is not None:

1037

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1036

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1038

nominateddeltas.append(candidatedelta)

1037

nominateddeltas.append(candidatedelta)

1039

if nominateddeltas:

1038

if nominateddeltas:

1040

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1039

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1041

if deltainfo is not None:

1040

if deltainfo is not None:

1042

candidaterevs = groups.send(deltainfo.base)

1041

candidaterevs = groups.send(deltainfo.base)

1043

else:

1042

else:

1044

candidaterevs = next(groups)

1043

candidaterevs = next(groups)

1045

1044

1046

if deltainfo is None:

1045

if deltainfo is None:

1047

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1046

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1048

return deltainfo

1047

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(revs,
                                             revlog._srdensitythreshold,
                                             revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
-                    res = revlog._processflags(fulltext, flags, 'read', raw=True)
+                    validatehash = revlog._processflagsraw(fulltext, flags)
-                    fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, 'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 snapshots = collections.defaultdict(list)
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: snapshot-rev
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if snapshotlimit < lowestrealisticdeltalen:
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo