upstream/mercurial-mirror Commit - r42668:9b5fbe5e

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

util,

33

util,

34

)

34

)

35

36

# maximum <delta-chain-data>/<revision-text-length> ratio

36

# maximum <delta-chain-data>/<revision-text-length> ratio

37

LIMIT_DELTA2TEXT = 2

37

LIMIT_DELTA2TEXT = 2

38

39

class _testrevlog(object):

39

class _testrevlog(object):

40

"""minimalist fake revlog to use in doctests"""

40

"""minimalist fake revlog to use in doctests"""

41

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

43

"""data is an list of revision payload boundaries"""

43

"""data is an list of revision payload boundaries"""

44

self._data = data

44

self._data = data

45

self._srdensitythreshold = density

45

self._srdensitythreshold = density

46

self._srmingapsize = mingap

46

self._srmingapsize = mingap

47

self._snapshot = set(snapshot)

47

self._snapshot = set(snapshot)

48

self.index = None

48

self.index = None

49

50

def start(self, rev):

50

def start(self, rev):

51

if rev == nullrev:

51

if rev == nullrev:

52

return 0

52

return 0

53

if rev == 0:

53

if rev == 0:

54

return 0

54

return 0

55

return self._data[rev - 1]

55

return self._data[rev - 1]

56

57

def end(self, rev):

57

def end(self, rev):

58

if rev == nullrev:

58

if rev == nullrev:

59

return 0

59

return 0

60

return self._data[rev]

60

return self._data[rev]

61

62

def length(self, rev):

62

def length(self, rev):

63

return self.end(rev) - self.start(rev)

63

return self.end(rev) - self.start(rev)

64

65

def __len__(self):

65

def __len__(self):

66

return len(self._data)

66

return len(self._data)

67

68

def issnapshot(self, rev):

68

def issnapshot(self, rev):

69

if rev == nullrev:

69

if rev == nullrev:

70

return True

70

return True

71

return rev in self._snapshot

71

return rev in self._snapshot

72

73

def slicechunk(revlog, revs, targetsize=None):

73

def slicechunk(revlog, revs, targetsize=None):

74

"""slice revs to reduce the amount of unrelated data to be read from disk.

74

"""slice revs to reduce the amount of unrelated data to be read from disk.

75

76

``revs`` is sliced into groups that should be read in one time.

76

``revs`` is sliced into groups that should be read in one time.

77

Assume that revs are sorted.

77

Assume that revs are sorted.

78

79

The initial chunk is sliced until the overall density (payload/chunks-span

79

The initial chunk is sliced until the overall density (payload/chunks-span

80

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

80

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

81

`revlog._srmingapsize` is skipped.

81

`revlog._srmingapsize` is skipped.

82

83

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

83

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

84

For consistency with other slicing choice, this limit won't go lower than

84

For consistency with other slicing choice, this limit won't go lower than

85

`revlog._srmingapsize`.

85

`revlog._srmingapsize`.

86

87

If individual revisions chunk are larger than this limit, they will still

87

If individual revisions chunk are larger than this limit, they will still

88

be raised individually.

88

be raised individually.

89

90

>>> data = [

90

>>> data = [

91

... 5, #00 (5)

91

... 5, #00 (5)

92

... 10, #01 (5)

92

... 10, #01 (5)

93

... 12, #02 (2)

93

... 12, #02 (2)

94

... 12, #03 (empty)

94

... 12, #03 (empty)

95

... 27, #04 (15)

95

... 27, #04 (15)

96

... 31, #05 (4)

96

... 31, #05 (4)

97

... 31, #06 (empty)

97

... 31, #06 (empty)

98

... 42, #07 (11)

98

... 42, #07 (11)

99

... 47, #08 (5)

99

... 47, #08 (5)

100

... 47, #09 (empty)

100

... 47, #09 (empty)

101

... 48, #10 (1)

101

... 48, #10 (1)

102

... 51, #11 (3)

102

... 51, #11 (3)

103

... 74, #12 (23)

103

... 74, #12 (23)

104

... 85, #13 (11)

104

... 85, #13 (11)

105

... 86, #14 (1)

105

... 86, #14 (1)

106

... 91, #15 (5)

106

... 91, #15 (5)

107

... ]

107

... ]

108

>>> revlog = _testrevlog(data, snapshot=range(16))

108

>>> revlog = _testrevlog(data, snapshot=range(16))

109

110

>>> list(slicechunk(revlog, list(range(16))))

110

>>> list(slicechunk(revlog, list(range(16))))

111

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

111

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

112

>>> list(slicechunk(revlog, [0, 15]))

112

>>> list(slicechunk(revlog, [0, 15]))

113

[[0], [15]]

113

[[0], [15]]

114

>>> list(slicechunk(revlog, [0, 11, 15]))

114

>>> list(slicechunk(revlog, [0, 11, 15]))

115

[[0], [11], [15]]

115

[[0], [11], [15]]

116

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

116

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

117

[[0], [11, 13, 15]]

117

[[0], [11, 13, 15]]

118

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

118

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

119

[[1, 2], [5, 8, 10, 11], [14]]

119

[[1, 2], [5, 8, 10, 11], [14]]

120

121

Slicing with a maximum chunk size

121

Slicing with a maximum chunk size

122

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

122

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

123

[[0], [11], [13], [15]]

123

[[0], [11], [13], [15]]

124

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

124

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

125

[[0], [11], [13, 15]]

125

[[0], [11], [13, 15]]

126

127

Slicing involving nullrev

127

Slicing involving nullrev

128

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

128

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

129

[[-1, 0], [11], [13, 15]]

129

[[-1, 0], [11], [13, 15]]

130

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

130

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

131

[[-1], [13], [15]]

131

[[-1], [13], [15]]

132

"""

132

"""

133

if targetsize is not None:

133

if targetsize is not None:

134

targetsize = max(targetsize, revlog._srmingapsize)

134

targetsize = max(targetsize, revlog._srmingapsize)

135

# targetsize should not be specified when evaluating delta candidates:

135

# targetsize should not be specified when evaluating delta candidates:

136

# * targetsize is used to ensure we stay within specification when reading,

136

# * targetsize is used to ensure we stay within specification when reading,

137

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

137

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

138

if densityslicing is None:

138

if densityslicing is None:

139

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

139

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

140

for chunk in densityslicing(revs,

140

for chunk in densityslicing(revs,

141

revlog._srdensitythreshold,

141

revlog._srdensitythreshold,

142

revlog._srmingapsize):

142

revlog._srmingapsize):

143

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

143

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

144

yield subchunk

144

yield subchunk

145

146

def _slicechunktosize(revlog, revs, targetsize=None):

146

def _slicechunktosize(revlog, revs, targetsize=None):

147

"""slice revs to match the target size

147

"""slice revs to match the target size

148

149

This is intended to be used on chunk that density slicing selected by that

149

This is intended to be used on chunk that density slicing selected by that

150

are still too large compared to the read garantee of revlog. This might

150

are still too large compared to the read garantee of revlog. This might

151

happens when "minimal gap size" interrupted the slicing or when chain are

151

happens when "minimal gap size" interrupted the slicing or when chain are

152

built in a way that create large blocks next to each other.

152

built in a way that create large blocks next to each other.

153

154

>>> data = [

154

>>> data = [

155

... 3, #0 (3)

155

... 3, #0 (3)

156

... 5, #1 (2)

156

... 5, #1 (2)

157

... 6, #2 (1)

157

... 6, #2 (1)

158

... 8, #3 (2)

158

... 8, #3 (2)

159

... 8, #4 (empty)

159

... 8, #4 (empty)

160

... 11, #5 (3)

160

... 11, #5 (3)

161

... 12, #6 (1)

161

... 12, #6 (1)

162

... 13, #7 (1)

162

... 13, #7 (1)

163

... 14, #8 (1)

163

... 14, #8 (1)

164

... ]

164

... ]

165

166

== All snapshots cases ==

166

== All snapshots cases ==

167

>>> revlog = _testrevlog(data, snapshot=range(9))

167

>>> revlog = _testrevlog(data, snapshot=range(9))

168

169

Cases where chunk is already small enough

169

Cases where chunk is already small enough

170

>>> list(_slicechunktosize(revlog, [0], 3))

170

>>> list(_slicechunktosize(revlog, [0], 3))

171

[[0]]

171

[[0]]

172

>>> list(_slicechunktosize(revlog, [6, 7], 3))

172

>>> list(_slicechunktosize(revlog, [6, 7], 3))

173

[[6, 7]]

173

[[6, 7]]

174

>>> list(_slicechunktosize(revlog, [0], None))

174

>>> list(_slicechunktosize(revlog, [0], None))

175

[[0]]

175

[[0]]

176

>>> list(_slicechunktosize(revlog, [6, 7], None))

176

>>> list(_slicechunktosize(revlog, [6, 7], None))

177

[[6, 7]]

177

[[6, 7]]

178

179

cases where we need actual slicing

179

cases where we need actual slicing

180

>>> list(_slicechunktosize(revlog, [0, 1], 3))

180

>>> list(_slicechunktosize(revlog, [0, 1], 3))

181

[[0], [1]]

181

[[0], [1]]

182

>>> list(_slicechunktosize(revlog, [1, 3], 3))

182

>>> list(_slicechunktosize(revlog, [1, 3], 3))

183

[[1], [3]]

183

[[1], [3]]

184

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

184

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

185

[[1, 2], [3]]

185

[[1, 2], [3]]

186

>>> list(_slicechunktosize(revlog, [3, 5], 3))

186

>>> list(_slicechunktosize(revlog, [3, 5], 3))

187

[[3], [5]]

187

[[3], [5]]

188

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

188

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

189

[[3], [5]]

189

[[3], [5]]

190

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

190

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

191

[[5], [6, 7, 8]]

191

[[5], [6, 7, 8]]

192

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

192

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

193

[[0], [1, 2], [3], [5], [6, 7, 8]]

193

[[0], [1, 2], [3], [5], [6, 7, 8]]

194

195

Case with too large individual chunk (must return valid chunk)

195

Case with too large individual chunk (must return valid chunk)

196

>>> list(_slicechunktosize(revlog, [0, 1], 2))

196

>>> list(_slicechunktosize(revlog, [0, 1], 2))

197

[[0], [1]]

197

[[0], [1]]

198

>>> list(_slicechunktosize(revlog, [1, 3], 1))

198

>>> list(_slicechunktosize(revlog, [1, 3], 1))

199

[[1], [3]]

199

[[1], [3]]

200

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

200

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

201

[[3], [5]]

201

[[3], [5]]

202

203

== No Snapshot cases ==

203

== No Snapshot cases ==

204

>>> revlog = _testrevlog(data)

204

>>> revlog = _testrevlog(data)

205

206

Cases where chunk is already small enough

206

Cases where chunk is already small enough

207

>>> list(_slicechunktosize(revlog, [0], 3))

207

>>> list(_slicechunktosize(revlog, [0], 3))

208

[[0]]

208

[[0]]

209

>>> list(_slicechunktosize(revlog, [6, 7], 3))

209

>>> list(_slicechunktosize(revlog, [6, 7], 3))

210

[[6, 7]]

210

[[6, 7]]

211

>>> list(_slicechunktosize(revlog, [0], None))

211

>>> list(_slicechunktosize(revlog, [0], None))

212

[[0]]

212

[[0]]

213

>>> list(_slicechunktosize(revlog, [6, 7], None))

213

>>> list(_slicechunktosize(revlog, [6, 7], None))

214

[[6, 7]]

214

[[6, 7]]

215

216

cases where we need actual slicing

216

cases where we need actual slicing

217

>>> list(_slicechunktosize(revlog, [0, 1], 3))

217

>>> list(_slicechunktosize(revlog, [0, 1], 3))

218

[[0], [1]]

218

[[0], [1]]

219

>>> list(_slicechunktosize(revlog, [1, 3], 3))

219

>>> list(_slicechunktosize(revlog, [1, 3], 3))

220

[[1], [3]]

220

[[1], [3]]

221

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

221

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

222

[[1], [2, 3]]

222

[[1], [2, 3]]

223

>>> list(_slicechunktosize(revlog, [3, 5], 3))

223

>>> list(_slicechunktosize(revlog, [3, 5], 3))

224

[[3], [5]]

224

[[3], [5]]

225

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

225

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

226

[[3], [4, 5]]

226

[[3], [4, 5]]

227

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

227

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

228

[[5], [6, 7, 8]]

228

[[5], [6, 7, 8]]

229

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

229

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

230

[[0], [1, 2], [3], [5], [6, 7, 8]]

230

[[0], [1, 2], [3], [5], [6, 7, 8]]

231

232

Case with too large individual chunk (must return valid chunk)

232

Case with too large individual chunk (must return valid chunk)

233

>>> list(_slicechunktosize(revlog, [0, 1], 2))

233

>>> list(_slicechunktosize(revlog, [0, 1], 2))

234

[[0], [1]]

234

[[0], [1]]

235

>>> list(_slicechunktosize(revlog, [1, 3], 1))

235

>>> list(_slicechunktosize(revlog, [1, 3], 1))

236

[[1], [3]]

236

[[1], [3]]

237

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

237

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

238

[[3], [5]]

238

[[3], [5]]

239

240

== mixed case ==

240

== mixed case ==

241

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

241

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

242

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

242

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

243

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

243

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

244

"""

244

"""

245

assert targetsize is None or 0 <= targetsize

245

assert targetsize is None or 0 <= targetsize

246

startdata = revlog.start(revs[0])

246

startdata = revlog.start(revs[0])

247

enddata = revlog.end(revs[-1])

247

enddata = revlog.end(revs[-1])

248

fullspan = enddata - startdata

248

fullspan = enddata - startdata

249

if targetsize is None or fullspan <= targetsize:

249

if targetsize is None or fullspan <= targetsize:

250

yield revs

250

yield revs

251

return

251

return

252

253

startrevidx = 0

253

startrevidx = 0

254

endrevidx = 1

254

endrevidx = 1

255

iterrevs = enumerate(revs)

255

iterrevs = enumerate(revs)

256

next(iterrevs) # skip first rev.

256

next(iterrevs) # skip first rev.

257

# first step: get snapshots out of the way

257

# first step: get snapshots out of the way

258

for idx, r in iterrevs:

258

for idx, r in iterrevs:

259

span = revlog.end(r) - startdata

259

span = revlog.end(r) - startdata

260

snapshot = revlog.issnapshot(r)

260

snapshot = revlog.issnapshot(r)

261

if span <= targetsize and snapshot:

261

if span <= targetsize and snapshot:

262

endrevidx = idx + 1

262

endrevidx = idx + 1

263

else:

263

else:

264

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

264

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

265

if chunk:

265

if chunk:

266

yield chunk

266

yield chunk

267

startrevidx = idx

267

startrevidx = idx

268

startdata = revlog.start(r)

268

startdata = revlog.start(r)

269

endrevidx = idx + 1

269

endrevidx = idx + 1

270

if not snapshot:

270

if not snapshot:

271

break

271

break

272

273

# for the others, we use binary slicing to quickly converge toward valid

273

# for the others, we use binary slicing to quickly converge toward valid

274

# chunks (otherwise, we might end up looking for start/end of many

274

# chunks (otherwise, we might end up looking for start/end of many

275

# revisions). This logic is not looking for the perfect slicing point, it

275

# revisions). This logic is not looking for the perfect slicing point, it

276

# focuses on quickly converging toward valid chunks.

276

# focuses on quickly converging toward valid chunks.

277

nbitem = len(revs)

277

nbitem = len(revs)

278

while (enddata - startdata) > targetsize:

278

while (enddata - startdata) > targetsize:

279

endrevidx = nbitem

279

endrevidx = nbitem

280

if nbitem - startrevidx <= 1:

280

if nbitem - startrevidx <= 1:

281

break # protect against individual chunk larger than limit

281

break # protect against individual chunk larger than limit

282

localenddata = revlog.end(revs[endrevidx - 1])

282

localenddata = revlog.end(revs[endrevidx - 1])

283

span = localenddata - startdata

283

span = localenddata - startdata

284

while span > targetsize:

284

while span > targetsize:

285

if endrevidx - startrevidx <= 1:

285

if endrevidx - startrevidx <= 1:

286

break # protect against individual chunk larger than limit

286

break # protect against individual chunk larger than limit

287

endrevidx -= (endrevidx - startrevidx) // 2

287

endrevidx -= (endrevidx - startrevidx) // 2

288

localenddata = revlog.end(revs[endrevidx - 1])

288

localenddata = revlog.end(revs[endrevidx - 1])

289

span = localenddata - startdata

289

span = localenddata - startdata

290

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

290

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

291

if chunk:

291

if chunk:

292

yield chunk

292

yield chunk

293

startrevidx = endrevidx

293

startrevidx = endrevidx

294

startdata = revlog.start(revs[startrevidx])

294

startdata = revlog.start(revs[startrevidx])

295

296

chunk = _trimchunk(revlog, revs, startrevidx)

296

chunk = _trimchunk(revlog, revs, startrevidx)

297

if chunk:

297

if chunk:

298

yield chunk

298

yield chunk

299

300

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

300

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

301

mingapsize=0):

301

mingapsize=0):

302

"""slice revs to reduce the amount of unrelated data to be read from disk.

302

"""slice revs to reduce the amount of unrelated data to be read from disk.

303

304

``revs`` is sliced into groups that should be read in one time.

304

``revs`` is sliced into groups that should be read in one time.

305

Assume that revs are sorted.

305

Assume that revs are sorted.

306

307

The initial chunk is sliced until the overall density (payload/chunks-span

307

The initial chunk is sliced until the overall density (payload/chunks-span

308

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

308

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

309

skipped.

309

skipped.

310

311

>>> revlog = _testrevlog([

311

>>> revlog = _testrevlog([

312

... 5, #00 (5)

312

... 5, #00 (5)

313

... 10, #01 (5)

313

... 10, #01 (5)

314

... 12, #02 (2)

314

... 12, #02 (2)

315

... 12, #03 (empty)

315

... 12, #03 (empty)

316

... 27, #04 (15)

316

... 27, #04 (15)

317

... 31, #05 (4)

317

... 31, #05 (4)

318

... 31, #06 (empty)

318

... 31, #06 (empty)

319

... 42, #07 (11)

319

... 42, #07 (11)

320

... 47, #08 (5)

320

... 47, #08 (5)

321

... 47, #09 (empty)

321

... 47, #09 (empty)

322

... 48, #10 (1)

322

... 48, #10 (1)

323

... 51, #11 (3)

323

... 51, #11 (3)

324

... 74, #12 (23)

324

... 74, #12 (23)

325

... 85, #13 (11)

325

... 85, #13 (11)

326

... 86, #14 (1)

326

... 86, #14 (1)

327

... 91, #15 (5)

327

... 91, #15 (5)

328

... ])

328

... ])

329

330

>>> list(_slicechunktodensity(revlog, list(range(16))))

330

>>> list(_slicechunktodensity(revlog, list(range(16))))

331

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

331

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

332

>>> list(_slicechunktodensity(revlog, [0, 15]))

332

>>> list(_slicechunktodensity(revlog, [0, 15]))

333

[[0], [15]]

333

[[0], [15]]

334

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

334

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

335

[[0], [11], [15]]

335

[[0], [11], [15]]

336

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

336

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

337

[[0], [11, 13, 15]]

337

[[0], [11, 13, 15]]

338

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

338

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

339

[[1, 2], [5, 8, 10, 11], [14]]

339

[[1, 2], [5, 8, 10, 11], [14]]

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

340

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

341

... mingapsize=20))

341

... mingapsize=20))

342

[[1, 2, 3, 5, 8, 10, 11], [14]]

342

[[1, 2, 3, 5, 8, 10, 11], [14]]

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

343

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

344

... targetdensity=0.95))

344

... targetdensity=0.95))

345

[[1, 2], [5], [8, 10, 11], [14]]

345

[[1, 2], [5], [8, 10, 11], [14]]

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

346

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

347

... targetdensity=0.95, mingapsize=12))

347

... targetdensity=0.95, mingapsize=12))

348

[[1, 2], [5, 8, 10, 11], [14]]

348

[[1, 2], [5, 8, 10, 11], [14]]

349

"""

349

"""

350

start = revlog.start

350

start = revlog.start

351

length = revlog.length

351

length = revlog.length

352

353

if len(revs) <= 1:

353

if len(revs) <= 1:

354

yield revs

354

yield revs

355

return

355

return

356

357

deltachainspan = segmentspan(revlog, revs)

357

deltachainspan = segmentspan(revlog, revs)

358

359

if deltachainspan < mingapsize:

359

if deltachainspan < mingapsize:

360

yield revs

360

yield revs

361

return

361

return

362

363

readdata = deltachainspan

363

readdata = deltachainspan

364

chainpayload = sum(length(r) for r in revs)

364

chainpayload = sum(length(r) for r in revs)

365

366

if deltachainspan:

366

if deltachainspan:

367

density = chainpayload / float(deltachainspan)

367

density = chainpayload / float(deltachainspan)

368

else:

368

else:

369

density = 1.0

369

density = 1.0

370

371

if density >= targetdensity:

371

if density >= targetdensity:

372

yield revs

372

yield revs

373

return

373

return

374

375

# Store the gaps in a heap to have them sorted by decreasing size

375

# Store the gaps in a heap to have them sorted by decreasing size

376

gaps = []

376

gaps = []

377

prevend = None

377

prevend = None

378

for i, rev in enumerate(revs):

378

for i, rev in enumerate(revs):

379

revstart = start(rev)

379

revstart = start(rev)

380

revlen = length(rev)

380

revlen = length(rev)

381

382

# Skip empty revisions to form larger holes

382

# Skip empty revisions to form larger holes

383

if revlen == 0:

383

if revlen == 0:

384

continue

384

continue

385

386

if prevend is not None:

386

if prevend is not None:

387

gapsize = revstart - prevend

387

gapsize = revstart - prevend

388

# only consider holes that are large enough

388

# only consider holes that are large enough

389

if gapsize > mingapsize:

389

if gapsize > mingapsize:

390

gaps.append((gapsize, i))

390

gaps.append((gapsize, i))

391

392

prevend = revstart + revlen

392

prevend = revstart + revlen

393

# sort the gaps to pop them from largest to small

393

# sort the gaps to pop them from largest to small

394

gaps.sort()

394

gaps.sort()

395

396

# Collect the indices of the largest holes until the density is acceptable

396

# Collect the indices of the largest holes until the density is acceptable

397

selected = []

397

selected = []

398

while gaps and density < targetdensity:

398

while gaps and density < targetdensity:

399

gapsize, gapidx = gaps.pop()

399

gapsize, gapidx = gaps.pop()

400

401

selected.append(gapidx)

401

selected.append(gapidx)

402

403

# the gap sizes are stored as negatives to be sorted decreasingly

403

# the gap sizes are stored as negatives to be sorted decreasingly

404

# by the heap

404

# by the heap

405

readdata -= gapsize

405

readdata -= gapsize

406

if readdata > 0:

406

if readdata > 0:

407

density = chainpayload / float(readdata)

407

density = chainpayload / float(readdata)

408

else:

408

else:

409

density = 1.0

409

density = 1.0

410

selected.sort()

410

selected.sort()

411

412

# Cut the revs at collected indices

412

# Cut the revs at collected indices

413

previdx = 0

413

previdx = 0

414

for idx in selected:

414

for idx in selected:

415

416

chunk = _trimchunk(revlog, revs, previdx, idx)

416

chunk = _trimchunk(revlog, revs, previdx, idx)

417

if chunk:

417

if chunk:

418

yield chunk

418

yield chunk

419

420

previdx = idx

420

previdx = idx

421

422

chunk = _trimchunk(revlog, revs, previdx)

422

chunk = _trimchunk(revlog, revs, previdx)

423

if chunk:

423

if chunk:

424

yield chunk

424

yield chunk

425

426

def _trimchunk(revlog, revs, startidx, endidx=None):

426

def _trimchunk(revlog, revs, startidx, endidx=None):

427

"""returns revs[startidx:endidx] without empty trailing revs

427

"""returns revs[startidx:endidx] without empty trailing revs

428

429

Doctest Setup

429

Doctest Setup

430

>>> revlog = _testrevlog([

430

>>> revlog = _testrevlog([

431

... 5, #0

431

... 5, #0

432

... 10, #1

432

... 10, #1

433

... 12, #2

433

... 12, #2

434

... 12, #3 (empty)

434

... 12, #3 (empty)

435

... 17, #4

435

... 17, #4

436

... 21, #5

436

... 21, #5

437

... 21, #6 (empty)

437

... 21, #6 (empty)

438

... ])

438

... ])

439

440

Contiguous cases:

440

Contiguous cases:

441

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

441

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

442

[0, 1, 2, 3, 4, 5]

442

[0, 1, 2, 3, 4, 5]

443

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

443

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

444

[0, 1, 2, 3, 4]

444

[0, 1, 2, 3, 4]

445

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

445

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

446

[0, 1, 2]

446

[0, 1, 2]

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

447

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

448

[2]

448

[2]

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

450

[3, 4, 5]

450

[3, 4, 5]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

452

[3, 4]

452

[3, 4]

453

454

Discontiguous cases:

454

Discontiguous cases:

455

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

455

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

456

[1, 3, 5]

456

[1, 3, 5]

457

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

457

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

458

[1]

458

[1]

459

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

459

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

460

[3, 5]

460

[3, 5]

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

461

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

462

[3, 5]

462

[3, 5]

463

"""

463

"""

464

length = revlog.length

464

length = revlog.length

465

466

if endidx is None:

466

if endidx is None:

467

endidx = len(revs)

467

endidx = len(revs)

468

469

# If we have a non-emtpy delta candidate, there are nothing to trim

469

# If we have a non-emtpy delta candidate, there are nothing to trim

470

if revs[endidx - 1] < len(revlog):

470

if revs[endidx - 1] < len(revlog):

471

# Trim empty revs at the end, except the very first revision of a chain

471

# Trim empty revs at the end, except the very first revision of a chain

472

while (endidx > 1

472

while (endidx > 1

473

and endidx > startidx

473

and endidx > startidx

474

and length(revs[endidx - 1]) == 0):

474

and length(revs[endidx - 1]) == 0):

475

endidx -= 1

475

endidx -= 1

476

477

return revs[startidx:endidx]

477

return revs[startidx:endidx]

478

479

def segmentspan(revlog, revs):

479

def segmentspan(revlog, revs):

480

"""Get the byte span of a segment of revisions

480

"""Get the byte span of a segment of revisions

481

482

revs is a sorted array of revision numbers

482

revs is a sorted array of revision numbers

483

484

>>> revlog = _testrevlog([

484

>>> revlog = _testrevlog([

485

... 5, #0

485

... 5, #0

486

... 10, #1

486

... 10, #1

487

... 12, #2

487

... 12, #2

488

... 12, #3 (empty)

488

... 12, #3 (empty)

489

... 17, #4

489

... 17, #4

490

... ])

490

... ])

491

492

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

492

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

493

17

493

17

494

>>> segmentspan(revlog, [0, 4])

494

>>> segmentspan(revlog, [0, 4])

495

17

495

17

496

>>> segmentspan(revlog, [3, 4])

496

>>> segmentspan(revlog, [3, 4])

497

5

497

5

498

>>> segmentspan(revlog, [1, 2, 3,])

498

>>> segmentspan(revlog, [1, 2, 3,])

499

7

499

7

500

>>> segmentspan(revlog, [1, 3])

500

>>> segmentspan(revlog, [1, 3])

501

7

501

7

502

"""

502

"""

503

if not revs:

503

if not revs:

504

return 0

504

return 0

505

end = revlog.end(revs[-1])

505

end = revlog.end(revs[-1])

506

return end - revlog.start(revs[0])

506

return end - revlog.start(revs[0])

507

508

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

508

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

509

"""build full text from a (base, delta) pair and other metadata"""

509

"""build full text from a (base, delta) pair and other metadata"""

510

# special case deltas which replace entire base; no need to decode

510

# special case deltas which replace entire base; no need to decode

511

# base revision. this neatly avoids censored bases, which throw when

511

# base revision. this neatly avoids censored bases, which throw when

512

# they're decoded.

512

# they're decoded.

513

hlen = struct.calcsize(">lll")

513

hlen = struct.calcsize(">lll")

514

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

514

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

515

len(delta) - hlen):

515

len(delta) - hlen):

516

fulltext = delta[hlen:]

516

fulltext = delta[hlen:]

517

else:

517

else:

518

# deltabase is rawtext before changed by flag processors, which is

518

# deltabase is rawtext before changed by flag processors, which is

519

# equivalent to non-raw text

519

# equivalent to non-raw text

520

basetext = revlog.revision(baserev, _df=fh, raw=False)

520

basetext = revlog.revision(baserev, _df=fh, raw=False)

521

fulltext = mdiff.patch(basetext, delta)

521

fulltext = mdiff.patch(basetext, delta)

522

523

try:

523

try:

524

res = revlog._processflags(fulltext, flags, 'read', raw=True)

524

res = revlog._processflags(fulltext, flags, 'read', raw=True)

525

fulltext, validatehash = res

525

fulltext, validatehash = res

526

if validatehash:

526

if validatehash:

527

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

527

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

528

if flags & REVIDX_ISCENSORED:

528

if flags & REVIDX_ISCENSORED:

529

raise error.StorageError(_('node %s is not censored') %

529

raise error.StorageError(_('node %s is not censored') %

530

expectednode)

530

expectednode)

531

except error.CensoredNodeError:

531

except error.CensoredNodeError:

532

# must pass the censored index flag to add censored revisions

532

# must pass the censored index flag to add censored revisions

533

if not flags & REVIDX_ISCENSORED:

533

if not flags & REVIDX_ISCENSORED:

534

raise

534

raise

535

return fulltext

535

return fulltext

536

537

@attr.s(slots=True, frozen=True)

537

@attr.s(slots=True, frozen=True)

538

class _deltainfo(object):

538

class _deltainfo(object):

539

distance = attr.ib()

539

distance = attr.ib()

540

deltalen = attr.ib()

540

deltalen = attr.ib()

541

data = attr.ib()

541

data = attr.ib()

542

base = attr.ib()

542

base = attr.ib()

543

chainbase = attr.ib()

543

chainbase = attr.ib()

544

chainlen = attr.ib()

544

chainlen = attr.ib()

545

compresseddeltalen = attr.ib()

545

compresseddeltalen = attr.ib()

546

snapshotdepth = attr.ib()

546

snapshotdepth = attr.ib()

547

548

def isgooddeltainfo(revlog, deltainfo, revinfo):

548

def isgooddeltainfo(revlog, deltainfo, revinfo):

549

"""Returns True if the given delta is good. Good means that it is within

549

"""Returns True if the given delta is good. Good means that it is within

550

the disk span, disk size, and chain length bounds that we know to be

550

the disk span, disk size, and chain length bounds that we know to be

551

performant."""

551

performant."""

552

if deltainfo is None:

552

if deltainfo is None:

553

return False

553

return False

554

555

# - 'deltainfo.distance' is the distance from the base revision --

555

# - 'deltainfo.distance' is the distance from the base revision --

556

# bounding it limits the amount of I/O we need to do.

556

# bounding it limits the amount of I/O we need to do.

557

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

557

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

558

# deltas we need to apply -- bounding it limits the amount of CPU

558

# deltas we need to apply -- bounding it limits the amount of CPU

559

# we consume.

559

# we consume.

560

561

textlen = revinfo.textlen

561

textlen = revinfo.textlen

562

defaultmax = textlen * 4

562

defaultmax = textlen * 4

563

maxdist = revlog._maxdeltachainspan

563

maxdist = revlog._maxdeltachainspan

564

if not maxdist:

564

if not maxdist:

565

maxdist = deltainfo.distance # ensure the conditional pass

565

maxdist = deltainfo.distance # ensure the conditional pass

566

maxdist = max(maxdist, defaultmax)

566

maxdist = max(maxdist, defaultmax)

567

568

# Bad delta from read span:

568

# Bad delta from read span:

569

#

569

#

570

# If the span of data read is larger than the maximum allowed.

570

# If the span of data read is larger than the maximum allowed.

571

#

571

#

572

# In the sparse-revlog case, we rely on the associated "sparse reading"

572

# In the sparse-revlog case, we rely on the associated "sparse reading"

573

# to avoid issue related to the span of data. In theory, it would be

573

# to avoid issue related to the span of data. In theory, it would be

574

# possible to build pathological revlog where delta pattern would lead

574

# possible to build pathological revlog where delta pattern would lead

575

# to too many reads. However, they do not happen in practice at all. So

575

# to too many reads. However, they do not happen in practice at all. So

576

# we skip the span check entirely.

576

# we skip the span check entirely.

577

if not revlog._sparserevlog and maxdist < deltainfo.distance:

577

if not revlog._sparserevlog and maxdist < deltainfo.distance:

578

return False

578

return False

579

580

# Bad delta from new delta size:

580

# Bad delta from new delta size:

581

#

581

#

582

# If the delta size is larger than the target text, storing the

582

# If the delta size is larger than the target text, storing the

583

# delta will be inefficient.

583

# delta will be inefficient.

584

if textlen < deltainfo.deltalen:

584

if textlen < deltainfo.deltalen:

585

return False

585

return False

586

587

# Bad delta from cumulated payload size:

587

# Bad delta from cumulated payload size:

588

#

588

#

589

# If the sum of delta get larger than K * target text length.

589

# If the sum of delta get larger than K * target text length.

590

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

590

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

591

return False

591

return False

592

593

# Bad delta from chain length:

593

# Bad delta from chain length:

594

#

594

#

595

# If the number of delta in the chain gets too high.

595

# If the number of delta in the chain gets too high.

596

if (revlog._maxchainlen

596

if (revlog._maxchainlen

597

and revlog._maxchainlen < deltainfo.chainlen):

597

and revlog._maxchainlen < deltainfo.chainlen):

598

return False

598

return False

599

600

# bad delta from intermediate snapshot size limit

600

# bad delta from intermediate snapshot size limit

601

#

601

#

602

# If an intermediate snapshot size is higher than the limit. The

602

# If an intermediate snapshot size is higher than the limit. The

603

# limit exist to prevent endless chain of intermediate delta to be

603

# limit exist to prevent endless chain of intermediate delta to be

604

# created.

604

# created.

605

if (deltainfo.snapshotdepth is not None and

605

if (deltainfo.snapshotdepth is not None and

606

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

606

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

607

return False

607

return False

608

609

# bad delta if new intermediate snapshot is larger than the previous

609

# bad delta if new intermediate snapshot is larger than the previous

610

# snapshot

610

# snapshot

611

if (deltainfo.snapshotdepth

611

if (deltainfo.snapshotdepth

612

and revlog.length(deltainfo.base) < deltainfo.deltalen):

612

and revlog.length(deltainfo.base) < deltainfo.deltalen):

613

return False

613

return False

614

615

return True

615

return True

616

617

# If a revision's full text is that much bigger than a base candidate full

617

# If a revision's full text is that much bigger than a base candidate full

618

# text's, it is very unlikely that it will produce a valid delta. We no longer

618

# text's, it is very unlikely that it will produce a valid delta. We no longer

619

# consider these candidates.

619

# consider these candidates.

620

LIMIT_BASE2TEXT = 500

620

LIMIT_BASE2TEXT = 500

621

622

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

622

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

623

"""Provides group of revision to be tested as delta base

623

"""Provides group of revision to be tested as delta base

624

625

This top level function focus on emitting groups with unique and worthwhile

625

This top level function focus on emitting groups with unique and worthwhile

626

content. See _raw_candidate_groups for details about the group order.

626

content. See _raw_candidate_groups for details about the group order.

627

"""

627

"""

628

# should we try to build a delta?

628

# should we try to build a delta?

629

if not (len(revlog) and revlog._storedeltachains):

629

if not (len(revlog) and revlog._storedeltachains):

630

yield None

630

yield None

631

return

631

return

632

633

deltalength = revlog.length

633

deltalength = revlog.length

634

deltaparent = revlog.deltaparent

634

deltaparent = revlog.deltaparent

635

sparse = revlog._sparserevlog

635

sparse = revlog._sparserevlog

636

good = None

636

good = None

637

638

deltas_limit = textlen * LIMIT_DELTA2TEXT

638

deltas_limit = textlen * LIMIT_DELTA2TEXT

639

640

tested = {nullrev}

640

tested = {nullrev}

641

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

641

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

642

while True:

642

while True:

643

temptative = candidates.send(good)

643

temptative = candidates.send(good)

644

if temptative is None:

644

if temptative is None:

645

break

645

break

646

group = []

646

group = []

647

for rev in temptative:

647

for rev in temptative:

648

# skip over empty delta (no need to include them in a chain)

648

# skip over empty delta (no need to include them in a chain)

649

while (revlog._generaldelta

649

while (revlog._generaldelta

650

and not (rev == nullrev

650

and not (rev == nullrev

651

or rev in tested

651

or rev in tested

652

or deltalength(rev))):

652

or deltalength(rev))):

653

tested.add(rev)

653

tested.add(rev)

654

rev = deltaparent(rev)

654

rev = deltaparent(rev)

655

# no need to try a delta against nullrev, this will be done as a

655

# no need to try a delta against nullrev, this will be done as a

656

# last resort.

656

# last resort.

657

if rev == nullrev:

657

if rev == nullrev:

658

continue

658

continue

659

# filter out revision we tested already

659

# filter out revision we tested already

660

if rev in tested:

660

if rev in tested:

661

continue

661

continue

662

tested.add(rev)

662

tested.add(rev)

663

# filter out delta base that will never produce good delta

663

# filter out delta base that will never produce good delta

664

if deltas_limit < revlog.length(rev):

664

if deltas_limit < revlog.length(rev):

665

continue

665

continue

666

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

666

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

667

continue

667

continue

668

# no delta for rawtext-changing revs (see "candelta" for why)

668

# no delta for rawtext-changing revs (see "candelta" for why)

669

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

669

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

670

continue

670

continue

671

# If we reach here, we are about to build and test a delta.

671

# If we reach here, we are about to build and test a delta.

672

# The delta building process will compute the chaininfo in all

672

# The delta building process will compute the chaininfo in all

673

# case, since that computation is cached, it is fine to access it

673

# case, since that computation is cached, it is fine to access it

674

# here too.

674

# here too.

675

chainlen, chainsize = revlog._chaininfo(rev)

675

chainlen, chainsize = revlog._chaininfo(rev)

676

# if chain will be too long, skip base

676

# if chain will be too long, skip base

677

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

677

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

678

continue

678

continue

679

# if chain already have too much data, skip base

679

# if chain already have too much data, skip base

680

if deltas_limit < chainsize:

680

if deltas_limit < chainsize:

681

continue

681

continue

682

if sparse and revlog.upperboundcomp is not None:

682

if sparse and revlog.upperboundcomp is not None:

683

maxcomp = revlog.upperboundcomp

683

maxcomp = revlog.upperboundcomp

684

basenotsnap = (p1, p2, nullrev)

684

basenotsnap = (p1, p2, nullrev)

685

if rev not in basenotsnap and revlog.issnapshot(rev):

685

if rev not in basenotsnap and revlog.issnapshot(rev):

686

snapshotdepth = revlog.snapshotdepth(rev)

686

snapshotdepth = revlog.snapshotdepth(rev)

687

# If text is significantly larger than the base, we can

687

# If text is significantly larger than the base, we can

688

# expect the resulting delta to be proportional to the size

688

# expect the resulting delta to be proportional to the size

689

# difference

689

# difference

690

revsize = revlog.rawsize(rev)

690

revsize = revlog.rawsize(rev)

691

rawsizedistance = max(textlen - revsize, 0)

691

rawsizedistance = max(textlen - revsize, 0)

692

# use an estimate of the compression upper bound.

692

# use an estimate of the compression upper bound.

693

lowestrealisticdeltalen = rawsizedistance // maxcomp

693

lowestrealisticdeltalen = rawsizedistance // maxcomp

694

695

# check the absolute constraint on the delta size

695

# check the absolute constraint on the delta size

696

snapshotlimit = textlen >> snapshotdepth

696

snapshotlimit = textlen >> snapshotdepth

697

if snapshotlimit < lowestrealisticdeltalen:

697

if snapshotlimit < lowestrealisticdeltalen:

698

# delta lower bound is larger than accepted upper bound

698

# delta lower bound is larger than accepted upper bound

699

continue

699

continue

700

701

# check the relative constraint on the delta size

701

# check the relative constraint on the delta size

702

revlength = revlog.length(rev)

702

revlength = revlog.length(rev)

703

if revlength < lowestrealisticdeltalen:

703

if revlength < lowestrealisticdeltalen:

704

# delta probable lower bound is larger than target base

704

# delta probable lower bound is larger than target base

705

continue

705

continue

706

707

group.append(rev)

707

group.append(rev)

708

if group:

708

if group:

709

# XXX: in the sparse revlog case, group can become large,

709

# XXX: in the sparse revlog case, group can become large,

710

# impacting performances. Some bounding or slicing mecanism

710

# impacting performances. Some bounding or slicing mecanism

711

# would help to reduce this impact.

711

# would help to reduce this impact.

712

good = yield tuple(group)

712

good = yield tuple(group)

713

yield None

713

yield None

714

715

def _findsnapshots(revlog, cache, start_rev):

715

def _findsnapshots(revlog, cache, start_rev):

716

"""find snapshot from start_rev to tip"""

716

"""find snapshot from start_rev to tip"""

717

if util.safehasattr(revlog.index, 'findsnapshots'):

717

if util.safehasattr(revlog.index, 'findsnapshots'):

718

revlog.index.findsnapshots(cache, start_rev)

718

revlog.index.findsnapshots(cache, start_rev)

719

else:

719

else:

720

deltaparent = revlog.deltaparent

720

deltaparent = revlog.deltaparent

721

issnapshot = revlog.issnapshot

721

issnapshot = revlog.issnapshot

722

for rev in revlog.revs(start_rev):

722

for rev in revlog.revs(start_rev):

723

if issnapshot(rev):

723

if issnapshot(rev):

724

cache[deltaparent(rev)].append(rev)

724

cache[deltaparent(rev)].append(rev)

725

726

def _refinedgroups(revlog, p1, p2, cachedelta):

726

def _refinedgroups(revlog, p1, p2, cachedelta):

727

good = None

727

good = None

728

# First we try to reuse a the delta contained in the bundle.

728

# First we try to reuse a the delta contained in the bundle.

729

# (or from the source revlog)

729

# (or from the source revlog)

730

#

730

#

731

# This logic only applies to general delta repositories and can be disabled

731

# This logic only applies to general delta repositories and can be disabled

732

# through configuration. Disabling reuse source delta is useful when

732

# through configuration. Disabling reuse source delta is useful when

733

# we want to make sure we recomputed "optimal" deltas.

733

# we want to make sure we recomputed "optimal" deltas.

734

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

734

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

735

# Assume what we received from the server is a good choice

735

# Assume what we received from the server is a good choice

736

# build delta will reuse the cache

736

# build delta will reuse the cache

737

good = yield (cachedelta[0],)

737

good = yield (cachedelta[0],)

738

if good is not None:

738

if good is not None:

739

yield None

739

yield None

740

return

740

return

741

snapshots = collections.defaultdict(list)

741

snapshots = collections.defaultdict(list)

742

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

742

for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):

743

good = yield candidates

743

good = yield candidates

744

if good is not None:

744

if good is not None:

745

break

745

break

746

747

# If sparse revlog is enabled, we can try to refine the available deltas

747

# If sparse revlog is enabled, we can try to refine the available deltas

748

if not revlog._sparserevlog:

748

if not revlog._sparserevlog:

749

yield None

749

yield None

750

return

750

return

751

752

# if we have a refinable value, try to refine it

752

# if we have a refinable value, try to refine it

753

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

753

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

754

# refine snapshot down

754

# refine snapshot down

755

previous = None

755

previous = None

756

while previous != good:

756

while previous != good:

757

previous = good

757

previous = good

758

base = revlog.deltaparent(good)

758

base = revlog.deltaparent(good)

759

if base == nullrev:

759

if base == nullrev:

760

break

760

break

761

good = yield (base,)

761

good = yield (base,)

762

# refine snapshot up

762

# refine snapshot up

763

if not snapshots:

763

if not snapshots:

764

_findsnapshots(revlog, snapshots, good + 1)

764

_findsnapshots(revlog, snapshots, good + 1)

765

previous = None

765

previous = None

766

while good != previous:

766

while good != previous:

767

previous = good

767

previous = good

768

children = tuple(sorted(c for c in snapshots[good]))

768

children = tuple(sorted(c for c in snapshots[good]))

769

good = yield children

769

good = yield children

770

771

# we have found nothing

771

# we have found nothing

772

yield None

772

yield None

773

774

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

774

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

775

"""Provides group of revision to be tested as delta base

775

"""Provides group of revision to be tested as delta base

776

777

This lower level function focus on emitting delta theorically interresting

777

This lower level function focus on emitting delta theorically interresting

778

without looking it any practical details.

778

without looking it any practical details.

779

780

The group order aims at providing fast or small candidates first.

780

The group order aims at providing fast or small candidates first.

781

"""

781

"""

782

gdelta = revlog._generaldelta

782

gdelta = revlog._generaldelta

783

# gate sparse behind general-delta because of issue6056

783

# gate sparse behind general-delta because of issue6056

784

sparse = gdelta and revlog._sparserevlog

784

sparse = gdelta and revlog._sparserevlog

785

curr = len(revlog)

785

curr = len(revlog)

786

prev = curr - 1

786

prev = curr - 1

787

deltachain = lambda rev: revlog._deltachain(rev)[0]

787

deltachain = lambda rev: revlog._deltachain(rev)[0]

788

789

if gdelta:

789

if gdelta:

790

# exclude already lazy tested base if any

790

# exclude already lazy tested base if any

791

parents = [p for p in (p1, p2) if p != nullrev]

791

parents = [p for p in (p1, p2) if p != nullrev]

792

793

if not revlog._deltabothparents and len(parents) == 2:

793

if not revlog._deltabothparents and len(parents) == 2:

794

parents.sort()

794

parents.sort()

795

# To minimize the chance of having to build a fulltext,

795

# To minimize the chance of having to build a fulltext,

796

# pick first whichever parent is closest to us (max rev)

796

# pick first whichever parent is closest to us (max rev)

797

yield (parents[1],)

797

yield (parents[1],)

798

# then the other one (min rev) if the first did not fit

798

# then the other one (min rev) if the first did not fit

799

yield (parents[0],)

799

yield (parents[0],)

800

elif len(parents) > 0:

800

elif len(parents) > 0:

801

# Test all parents (1 or 2), and keep the best candidate

801

# Test all parents (1 or 2), and keep the best candidate

802

yield parents

802

yield parents

803

804

if sparse and parents:

804

if sparse and parents:

805

if snapshots is None:

805

if snapshots is None:

806

# map: base-rev: snapshot-rev

806

# map: base-rev: snapshot-rev

807

snapshots = collections.defaultdict(list)

807

snapshots = collections.defaultdict(list)

808

# See if we can use an existing snapshot in the parent chains to use as

808

# See if we can use an existing snapshot in the parent chains to use as

809

# a base for a new intermediate-snapshot

809

# a base for a new intermediate-snapshot

810

#

810

#

811

# search for snapshot in parents delta chain

811

# search for snapshot in parents delta chain

812

# map: snapshot-level: snapshot-rev

812

# map: snapshot-level: snapshot-rev

813

parents_snaps = collections.defaultdict(set)

813

parents_snaps = collections.defaultdict(set)

814

candidate_chains = [deltachain(p) for p in parents]

814

candidate_chains = [deltachain(p) for p in parents]

815

for chain in candidate_chains:

815

for chain in candidate_chains:

816

for idx, s in enumerate(chain):

816

for idx, s in enumerate(chain):

817

if not revlog.issnapshot(s):

817

if not revlog.issnapshot(s):

818

break

818

break

819

parents_snaps[idx].add(s)

819

parents_snaps[idx].add(s)

820

snapfloor = min(parents_snaps[0]) + 1

820

snapfloor = min(parents_snaps[0]) + 1

821

_findsnapshots(revlog, snapshots, snapfloor)

821

_findsnapshots(revlog, snapshots, snapfloor)

822

# search for the highest "unrelated" revision

822

# search for the highest "unrelated" revision

823

#

823

#

824

# Adding snapshots used by "unrelated" revision increase the odd we

824

# Adding snapshots used by "unrelated" revision increase the odd we

825

# reuse an independant, yet better snapshot chain.

825

# reuse an independant, yet better snapshot chain.

826

#

826

#

827

# XXX instead of building a set of revisions, we could lazily enumerate

827

# XXX instead of building a set of revisions, we could lazily enumerate

828

# over the chains. That would be more efficient, however we stick to

828

# over the chains. That would be more efficient, however we stick to

829

# simple code for now.

829

# simple code for now.

830

all_revs = set()

830

all_revs = set()

831

for chain in candidate_chains:

831

for chain in candidate_chains:

832

all_revs.update(chain)

832

all_revs.update(chain)

833

other = None

833

other = None

834

for r in revlog.revs(prev, snapfloor):

834

for r in revlog.revs(prev, snapfloor):

835

if r not in all_revs:

835

if r not in all_revs:

836

other = r

836

other = r

837

break

837

break

838

if other is not None:

838

if other is not None:

839

# To avoid unfair competition, we won't use unrelated intermediate

839

# To avoid unfair competition, we won't use unrelated intermediate

840

# snapshot that are deeper than the ones from the parent delta

840

# snapshot that are deeper than the ones from the parent delta

841

# chain.

841

# chain.

842

max_depth = max(parents_snaps.keys())

842

max_depth = max(parents_snaps.keys())

843

chain = deltachain(other)

843

chain = deltachain(other)

844

for idx, s in enumerate(chain):

844

for idx, s in enumerate(chain):

845

if s < snapfloor:

845

if s < snapfloor:

846

continue

846

continue

847

if max_depth < idx:

847

if max_depth < idx:

848

break

848

break

849

if not revlog.issnapshot(s):

849

if not revlog.issnapshot(s):

850

break

850

break

851

parents_snaps[idx].add(s)

851

parents_snaps[idx].add(s)

852

# Test them as possible intermediate snapshot base

852

# Test them as possible intermediate snapshot base

853

# We test them from highest to lowest level. High level one are more

853

# We test them from highest to lowest level. High level one are more

854

# likely to result in small delta

854

# likely to result in small delta

855

floor = None

855

floor = None

856

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

856

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

857

siblings = set()

857

siblings = set()

858

for s in snaps:

858

for s in snaps:

859

siblings.update(snapshots[s])

859

siblings.update(snapshots[s])

860

# Before considering making a new intermediate snapshot, we check

860

# Before considering making a new intermediate snapshot, we check

861

# if an existing snapshot, children of base we consider, would be

861

# if an existing snapshot, children of base we consider, would be

862

# suitable.

862

# suitable.

863

#

863

#

864

# It give a change to reuse a delta chain "unrelated" to the

864

# It give a change to reuse a delta chain "unrelated" to the

865

# current revision instead of starting our own. Without such

865

# current revision instead of starting our own. Without such

866

# re-use, topological branches would keep reopening new chains.

866

# re-use, topological branches would keep reopening new chains.

867

# Creating more and more snapshot as the repository grow.

867

# Creating more and more snapshot as the repository grow.

868

869

if floor is not None:

869

if floor is not None:

870

# We only do this for siblings created after the one in our

870

# We only do this for siblings created after the one in our

871

# parent's delta chain. Those created before has less chances

871

# parent's delta chain. Those created before has less chances

872

# to be valid base since our ancestors had to create a new

872

# to be valid base since our ancestors had to create a new

873

# snapshot.

873

# snapshot.

874

siblings = [r for r in siblings if floor < r]

874

siblings = [r for r in siblings if floor < r]

875

yield tuple(sorted(siblings))

875

yield tuple(sorted(siblings))

876

# then test the base from our parent's delta chain.

876

# then test the base from our parent's delta chain.

877

yield tuple(sorted(snaps))

877

yield tuple(sorted(snaps))

878

floor = min(snaps)

878

floor = min(snaps)

879

# No suitable base found in the parent chain, search if any full

879

# No suitable base found in the parent chain, search if any full

880

# snapshots emitted since parent's base would be a suitable base for an

880

# snapshots emitted since parent's base would be a suitable base for an

881

# intermediate snapshot.

881

# intermediate snapshot.

882

#

882

#

883

# It give a chance to reuse a delta chain unrelated to the current

883

# It give a chance to reuse a delta chain unrelated to the current

884

# revisions instead of starting our own. Without such re-use,

884

# revisions instead of starting our own. Without such re-use,

885

# topological branches would keep reopening new full chains. Creating

885

# topological branches would keep reopening new full chains. Creating

886

# more and more snapshot as the repository grow.

886

# more and more snapshot as the repository grow.

887

yield tuple(snapshots[nullrev])

887

yield tuple(snapshots[nullrev])

888

889

if not sparse:

889

if not sparse:

890

# other approach failed try against prev to hopefully save us a

890

# other approach failed try against prev to hopefully save us a

891

# fulltext.

891

# fulltext.

892

yield (prev,)

892

yield (prev,)

893

894

class deltacomputer(object):

894

class deltacomputer(object):

895

def __init__(self, revlog):

895

def __init__(self, revlog):

896

self.revlog = revlog

896

self.revlog = revlog

897

898

def buildtext(self, revinfo, fh):

898

def buildtext(self, revinfo, fh):

899

"""Builds a fulltext version of a revision

899

"""Builds a fulltext version of a revision

900

901

revinfo: _revisioninfo instance that contains all needed info

901

revinfo: _revisioninfo instance that contains all needed info

902

fh: file handle to either the .i or the .d revlog file,

902

fh: file handle to either the .i or the .d revlog file,

903

depending on whether it is inlined or not

903

depending on whether it is inlined or not

904

"""

904

"""

905

btext = revinfo.btext

905

btext = revinfo.btext

906

if btext[0] is not None:

906

if btext[0] is not None:

907

return btext[0]

907

return btext[0]

908

909

revlog = self.revlog

909

revlog = self.revlog

910

cachedelta = revinfo.cachedelta

910

cachedelta = revinfo.cachedelta

911

baserev = cachedelta[0]

911

baserev = cachedelta[0]

912

delta = cachedelta[1]

912

delta = cachedelta[1]

913

914

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

914

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

915

revinfo.p1, revinfo.p2,

915

revinfo.p1, revinfo.p2,

916

revinfo.flags, revinfo.node)

916

revinfo.flags, revinfo.node)

917

return fulltext

917

return fulltext

918

919

def _builddeltadiff(self, base, revinfo, fh):

919

def _builddeltadiff(self, base, revinfo, fh):

920

revlog = self.revlog

920

revlog = self.revlog

921

t = self.buildtext(revinfo, fh)

921

t = self.buildtext(revinfo, fh)

922

if revlog.iscensored(base):

922

if revlog.iscensored(base):

923

# deltas based on a censored revision must replace the

923

# deltas based on a censored revision must replace the

924

# full content in one patch, so delta works everywhere

924

# full content in one patch, so delta works everywhere

925

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

925

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

926

delta = header + t

926

delta = header + t

927

else:

927

else:

928

ptext = revlog.revision(base, _df=fh, raw=True)

928

ptext = revlog.revision(base, _df=fh, raw=True)

929

delta = mdiff.textdiff(ptext, t)

929

delta = mdiff.textdiff(ptext, t)

930

931

return delta

931

return delta

932

933

def _builddeltainfo(self, revinfo, base, fh):

933

def _builddeltainfo(self, revinfo, base, fh):

934

# can we use the cached delta?

934

# can we use the cached delta?

935

revlog = self.revlog

935

revlog = self.revlog

936

chainbase = revlog.chainbase(base)

936

chainbase = revlog.chainbase(base)

937

if revlog._generaldelta:

937

if revlog._generaldelta:

938

deltabase = base

938

deltabase = base

939

else:

939

else:

940

deltabase = chainbase

940

deltabase = chainbase

941

snapshotdepth = None

941

snapshotdepth = None

942

if revlog._sparserevlog and deltabase == nullrev:

942

if revlog._sparserevlog and deltabase == nullrev:

943

snapshotdepth = 0

943

snapshotdepth = 0

944

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

944

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

945

# A delta chain should always be one full snapshot,

945

# A delta chain should always be one full snapshot,

946

# zero or more semi-snapshots, and zero or more deltas

946

# zero or more semi-snapshots, and zero or more deltas

947

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

947

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

948

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

948

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

949

snapshotdepth = len(revlog._deltachain(deltabase)[0])

949

snapshotdepth = len(revlog._deltachain(deltabase)[0])

950

delta = None

950

delta = None

951

if revinfo.cachedelta:

951

if revinfo.cachedelta:

952

cachebase, cachediff = revinfo.cachedelta

952

cachebase, cachediff = revinfo.cachedelta

953

#check if the diff still apply

953

#check if the diff still apply

954

currentbase = cachebase

954

currentbase = cachebase

955

while (currentbase != nullrev

955

while (currentbase != nullrev

956

and currentbase != base

956

and currentbase != base

957

and self.revlog.length(currentbase) == 0):

957

and self.revlog.length(currentbase) == 0):

958

currentbase = self.revlog.deltaparent(currentbase)

958

currentbase = self.revlog.deltaparent(currentbase)

959

if self.revlog._lazydelta and currentbase == base:

959

if self.revlog._lazydelta and currentbase == base:

960

delta = revinfo.cachedelta[1]

960

delta = revinfo.cachedelta[1]

961

if delta is None:

961

if delta is None:

962

delta = self._builddeltadiff(base, revinfo, fh)

962

delta = self._builddeltadiff(base, revinfo, fh)

963

# snapshotdept need to be neither None nor 0 level snapshot

963

# snapshotdept need to be neither None nor 0 level snapshot

964

if revlog.upperboundcomp is not None and snapshotdepth:

964

if revlog.upperboundcomp is not None and snapshotdepth:

965

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

965

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

966

snapshotlimit = revinfo.textlen >> snapshotdepth

966

snapshotlimit = revinfo.textlen >> snapshotdepth

967

if snapshotlimit < lowestrealisticdeltalen:

967

if snapshotlimit < lowestrealisticdeltalen:

968

return None

968

return None

969

if revlog.length(base) < lowestrealisticdeltalen:

970

return None

969

header, data = revlog.compress(delta)

971

header, data = revlog.compress(delta)

970

deltalen = len(header) + len(data)

972

deltalen = len(header) + len(data)

971

offset = revlog.end(len(revlog) - 1)

973

offset = revlog.end(len(revlog) - 1)

972

dist = deltalen + offset - revlog.start(chainbase)

974

dist = deltalen + offset - revlog.start(chainbase)

973

chainlen, compresseddeltalen = revlog._chaininfo(base)

975

chainlen, compresseddeltalen = revlog._chaininfo(base)

974

chainlen += 1

976

chainlen += 1

975

compresseddeltalen += deltalen

977

compresseddeltalen += deltalen

976

978

977

return _deltainfo(dist, deltalen, (header, data), deltabase,

979

return _deltainfo(dist, deltalen, (header, data), deltabase,

978

chainbase, chainlen, compresseddeltalen,

980

chainbase, chainlen, compresseddeltalen,

979

snapshotdepth)

981

snapshotdepth)

980

982

981

def _fullsnapshotinfo(self, fh, revinfo):

983

def _fullsnapshotinfo(self, fh, revinfo):

982

curr = len(self.revlog)

984

curr = len(self.revlog)

983

rawtext = self.buildtext(revinfo, fh)

985

rawtext = self.buildtext(revinfo, fh)

984

data = self.revlog.compress(rawtext)

986

data = self.revlog.compress(rawtext)

985

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

987

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

986

deltabase = chainbase = curr

988

deltabase = chainbase = curr

987

snapshotdepth = 0

989

snapshotdepth = 0

988

chainlen = 1

990

chainlen = 1

989

991

990

return _deltainfo(dist, deltalen, data, deltabase,

992

return _deltainfo(dist, deltalen, data, deltabase,

991

chainbase, chainlen, compresseddeltalen,

993

chainbase, chainlen, compresseddeltalen,

992

snapshotdepth)

994

snapshotdepth)

993

995

994

def finddeltainfo(self, revinfo, fh):

996

def finddeltainfo(self, revinfo, fh):

995

"""Find an acceptable delta against a candidate revision

997

"""Find an acceptable delta against a candidate revision

996

998

997

revinfo: information about the revision (instance of _revisioninfo)

999

revinfo: information about the revision (instance of _revisioninfo)

998

fh: file handle to either the .i or the .d revlog file,

1000

fh: file handle to either the .i or the .d revlog file,

999

depending on whether it is inlined or not

1001

depending on whether it is inlined or not

1000

1002

1001

Returns the first acceptable candidate revision, as ordered by

1003

Returns the first acceptable candidate revision, as ordered by

1002

_candidategroups

1004

_candidategroups

1003

1005

1004

If no suitable deltabase is found, we return delta info for a full

1006

If no suitable deltabase is found, we return delta info for a full

1005

snapshot.

1007

snapshot.

1006

"""

1008

"""

1007

if not revinfo.textlen:

1009

if not revinfo.textlen:

1008

return self._fullsnapshotinfo(fh, revinfo)

1010

return self._fullsnapshotinfo(fh, revinfo)

1009

1011

1010

# no delta for flag processor revision (see "candelta" for why)

1012

# no delta for flag processor revision (see "candelta" for why)

1011

# not calling candelta since only one revision needs test, also to

1013

# not calling candelta since only one revision needs test, also to

1012

# avoid overhead fetching flags again.

1014

# avoid overhead fetching flags again.

1013

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1015

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1014

return self._fullsnapshotinfo(fh, revinfo)

1016

return self._fullsnapshotinfo(fh, revinfo)

1015

1017

1016

cachedelta = revinfo.cachedelta

1018

cachedelta = revinfo.cachedelta

1017

p1 = revinfo.p1

1019

p1 = revinfo.p1

1018

p2 = revinfo.p2

1020

p2 = revinfo.p2

1019

revlog = self.revlog

1021

revlog = self.revlog

1020

1022

1021

deltainfo = None

1023

deltainfo = None

1022

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1024

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1023

groups = _candidategroups(self.revlog, revinfo.textlen,

1025

groups = _candidategroups(self.revlog, revinfo.textlen,

1024

p1r, p2r, cachedelta)

1026

p1r, p2r, cachedelta)

1025

candidaterevs = next(groups)

1027

candidaterevs = next(groups)

1026

while candidaterevs is not None:

1028

while candidaterevs is not None:

1027

nominateddeltas = []

1029

nominateddeltas = []

1028

if deltainfo is not None:

1030

if deltainfo is not None:

1029

# if we already found a good delta,

1031

# if we already found a good delta,

1030

# challenge it against refined candidates

1032

# challenge it against refined candidates

1031

nominateddeltas.append(deltainfo)

1033

nominateddeltas.append(deltainfo)

1032

for candidaterev in candidaterevs:

1034

for candidaterev in candidaterevs:

1033

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1035

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1034

if candidatedelta is not None:

1036

if candidatedelta is not None:

1035

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1037

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1036

nominateddeltas.append(candidatedelta)

1038

nominateddeltas.append(candidatedelta)

1037

if nominateddeltas:

1039

if nominateddeltas:

1038

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1040

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1039

if deltainfo is not None:

1041

if deltainfo is not None:

1040

candidaterevs = groups.send(deltainfo.base)

1042

candidaterevs = groups.send(deltainfo.base)

1041

else:

1043

else:

1042

candidaterevs = next(groups)

1044

candidaterevs = next(groups)

1043

1045

1044

if deltainfo is None:

1046

if deltainfo is None:

1045

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1047

deltainfo = self._fullsnapshotinfo(fh, revinfo)

1046

return deltainfo

1048

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(revs,
                                             revlog._srdensitythreshold,
                                             revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, 'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 snapshots = collections.defaultdict(list)
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: snapshot-rev
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if snapshotlimit < lowestrealisticdeltalen:
                             return None
+                        if revlog.length(base) < lowestrealisticdeltalen:
+                            return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo