upstream/mercurial-mirror Commit - r40678:9c3c6972

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import collections

12

import collections

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

# maximum <delta-chain-data>/<revision-text-length> ratio

35

# maximum <delta-chain-data>/<revision-text-length> ratio

36

LIMIT_DELTA2TEXT = 2

36

LIMIT_DELTA2TEXT = 2

37

38

class _testrevlog(object):

38

class _testrevlog(object):

39

"""minimalist fake revlog to use in doctests"""

39

"""minimalist fake revlog to use in doctests"""

40

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

41

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

42

"""data is an list of revision payload boundaries"""

42

"""data is an list of revision payload boundaries"""

43

self._data = data

43

self._data = data

44

self._srdensitythreshold = density

44

self._srdensitythreshold = density

45

self._srmingapsize = mingap

45

self._srmingapsize = mingap

46

self._snapshot = set(snapshot)

46

self._snapshot = set(snapshot)

47

48

def start(self, rev):

48

def start(self, rev):

49

if rev == 0:

49

if rev == 0:

50

return 0

50

return 0

51

return self._data[rev - 1]

51

return self._data[rev - 1]

52

53

def end(self, rev):

53

def end(self, rev):

54

return self._data[rev]

54

return self._data[rev]

55

56

def length(self, rev):

56

def length(self, rev):

57

return self.end(rev) - self.start(rev)

57

return self.end(rev) - self.start(rev)

58

59

def __len__(self):

59

def __len__(self):

60

return len(self._data)

60

return len(self._data)

61

62

def issnapshot(self, rev):

62

def issnapshot(self, rev):

63

return rev in self._snapshot

63

return rev in self._snapshot

64

65

def slicechunk(revlog, revs, targetsize=None):

65

def slicechunk(revlog, revs, targetsize=None):

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

"""slice revs to reduce the amount of unrelated data to be read from disk.

67

68

``revs`` is sliced into groups that should be read in one time.

68

``revs`` is sliced into groups that should be read in one time.

69

Assume that revs are sorted.

69

Assume that revs are sorted.

70

71

The initial chunk is sliced until the overall density (payload/chunks-span

71

The initial chunk is sliced until the overall density (payload/chunks-span

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

73

`revlog._srmingapsize` is skipped.

73

`revlog._srmingapsize` is skipped.

74

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

76

For consistency with other slicing choice, this limit won't go lower than

76

For consistency with other slicing choice, this limit won't go lower than

77

`revlog._srmingapsize`.

77

`revlog._srmingapsize`.

78

79

If individual revisions chunk are larger than this limit, they will still

79

If individual revisions chunk are larger than this limit, they will still

80

be raised individually.

80

be raised individually.

81

82

>>> revlog = _testrevlog([

82

>>> data = [

83

... 5, #00 (5)

83

... 5, #00 (5)

84

... 10, #01 (5)

84

... 10, #01 (5)

85

... 12, #02 (2)

85

... 12, #02 (2)

86

... 12, #03 (empty)

86

... 12, #03 (empty)

87

... 27, #04 (15)

87

... 27, #04 (15)

88

... 31, #05 (4)

88

... 31, #05 (4)

89

... 31, #06 (empty)

89

... 31, #06 (empty)

90

... 42, #07 (11)

90

... 42, #07 (11)

91

... 47, #08 (5)

91

... 47, #08 (5)

92

... 47, #09 (empty)

92

... 47, #09 (empty)

93

... 48, #10 (1)

93

... 48, #10 (1)

94

... 51, #11 (3)

94

... 51, #11 (3)

95

... 74, #12 (23)

95

... 74, #12 (23)

96

... 85, #13 (11)

96

... 85, #13 (11)

97

... 86, #14 (1)

97

... 86, #14 (1)

98

... 91, #15 (5)

98

... 91, #15 (5)

99

... ])

99

... ]

100

>>> revlog = _testrevlog(data, snapshot=range(16))

100

101

>>> list(slicechunk(revlog, list(range(16))))

102

>>> list(slicechunk(revlog, list(range(16))))

102

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

103

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

103

>>> list(slicechunk(revlog, [0, 15]))

104

>>> list(slicechunk(revlog, [0, 15]))

104

[[0], [15]]

105

[[0], [15]]

105

>>> list(slicechunk(revlog, [0, 11, 15]))

106

>>> list(slicechunk(revlog, [0, 11, 15]))

106

[[0], [11], [15]]

107

[[0], [11], [15]]

107

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

108

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

108

[[0], [11, 13, 15]]

109

[[0], [11, 13, 15]]

109

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

110

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

110

[[1, 2], [5, 8, 10, 11], [14]]

111

[[1, 2], [5, 8, 10, 11], [14]]

111

112

Slicing with a maximum chunk size

113

Slicing with a maximum chunk size

113

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

114

[[0], [11], [13], [15]]

115

[[0], [11], [13], [15]]

115

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

116

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

116

[[0], [11], [13, 15]]

117

[[0], [11], [13, 15]]

117

"""

118

"""

118

if targetsize is not None:

119

if targetsize is not None:

119

targetsize = max(targetsize, revlog._srmingapsize)

120

targetsize = max(targetsize, revlog._srmingapsize)

120

# targetsize should not be specified when evaluating delta candidates:

121

# targetsize should not be specified when evaluating delta candidates:

121

# * targetsize is used to ensure we stay within specification when reading,

122

# * targetsize is used to ensure we stay within specification when reading,

122

for chunk in _slicechunktodensity(revlog, revs,

123

for chunk in _slicechunktodensity(revlog, revs,

123

revlog._srdensitythreshold,

124

revlog._srdensitythreshold,

124

revlog._srmingapsize):

125

revlog._srmingapsize):

125

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

126

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

126

yield subchunk

127

yield subchunk

127

128

def _slicechunktosize(revlog, revs, targetsize=None):

129

def _slicechunktosize(revlog, revs, targetsize=None):

129

"""slice revs to match the target size

130

"""slice revs to match the target size

130

131

This is intended to be used on chunk that density slicing selected by that

132

This is intended to be used on chunk that density slicing selected by that

132

are still too large compared to the read garantee of revlog. This might

133

are still too large compared to the read garantee of revlog. This might

133

happens when "minimal gap size" interrupted the slicing or when chain are

134

happens when "minimal gap size" interrupted the slicing or when chain are

134

built in a way that create large blocks next to each other.

135

built in a way that create large blocks next to each other.

135

136

>>> revlog = _testrevlog([

137

>>> data = [

137

... 3, #0 (3)

138

... 3, #0 (3)

138

... 5, #1 (2)

139

... 5, #1 (2)

139

... 6, #2 (1)

140

... 6, #2 (1)

140

... 8, #3 (2)

141

... 8, #3 (2)

141

... 8, #4 (empty)

142

... 8, #4 (empty)

142

... 11, #5 (3)

143

... 11, #5 (3)

143

... 12, #6 (1)

144

... 12, #6 (1)

144

... 13, #7 (1)

145

... 13, #7 (1)

145

... 14, #8 (1)

146

... 14, #8 (1)

146

... ])

147

... ]

148

149

== All snapshots cases ==

150

>>> revlog = _testrevlog(data, snapshot=range(9))

147

151

148

Cases where chunk is already small enough

152

Cases where chunk is already small enough

149

>>> list(_slicechunktosize(revlog, [0], 3))

153

>>> list(_slicechunktosize(revlog, [0], 3))

150

[[0]]

154

[[0]]

151

>>> list(_slicechunktosize(revlog, [6, 7], 3))

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

152

[[6, 7]]

156

[[6, 7]]

153

>>> list(_slicechunktosize(revlog, [0], None))

157

>>> list(_slicechunktosize(revlog, [0], None))

154

[[0]]

158

[[0]]

155

>>> list(_slicechunktosize(revlog, [6, 7], None))

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

156

[[6, 7]]

160

[[6, 7]]

157

161

158

cases where we need actual slicing

162

cases where we need actual slicing

159

>>> list(_slicechunktosize(revlog, [0, 1], 3))

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

160

[[0], [1]]

164

[[0], [1]]

161

>>> list(_slicechunktosize(revlog, [1, 3], 3))

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

162

[[1], [3]]

166

[[1], [3]]

163

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

164

[[1, 2], [3]]

168

[[1, 2], [3]]

165

>>> list(_slicechunktosize(revlog, [3, 5], 3))

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

166

[[3], [5]]

170

[[3], [5]]

167

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

168

[[3], [5]]

172

[[3], [5]]

169

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

170

[[5], [6, 7, 8]]

174

[[5], [6, 7, 8]]

171

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

172

[[0], [1, 2], [3], [5], [6, 7, 8]]

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

173

177

174

Case with too large individual chunk (must return valid chunk)

178

Case with too large individual chunk (must return valid chunk)

175

>>> list(_slicechunktosize(revlog, [0, 1], 2))

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

176

[[0], [1]]

180

[[0], [1]]

177

>>> list(_slicechunktosize(revlog, [1, 3], 1))

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

178

[[1], [3]]

182

[[1], [3]]

179

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

180

[[3], [5]]

184

[[3], [5]]

185

186

== No Snapshot cases ==

187

>>> revlog = _testrevlog(data)

188

189

Cases where chunk is already small enough

190

>>> list(_slicechunktosize(revlog, [0], 3))

191

[[0]]

192

>>> list(_slicechunktosize(revlog, [6, 7], 3))

193

[[6, 7]]

194

>>> list(_slicechunktosize(revlog, [0], None))

195

[[0]]

196

>>> list(_slicechunktosize(revlog, [6, 7], None))

197

[[6, 7]]

198

199

cases where we need actual slicing

200

>>> list(_slicechunktosize(revlog, [0, 1], 3))

201

[[0], [1]]

202

>>> list(_slicechunktosize(revlog, [1, 3], 3))

203

[[1], [3]]

204

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

205

[[1], [2, 3]]

206

>>> list(_slicechunktosize(revlog, [3, 5], 3))

207

[[3], [5]]

208

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

209

[[3], [4, 5]]

210

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

211

[[5], [6, 7, 8]]

212

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

213

[[0], [1, 2], [3], [5], [6, 7, 8]]

214

215

Case with too large individual chunk (must return valid chunk)

216

>>> list(_slicechunktosize(revlog, [0, 1], 2))

217

[[0], [1]]

218

>>> list(_slicechunktosize(revlog, [1, 3], 1))

219

[[1], [3]]

220

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

221

[[3], [5]]

222

223

== mixed case ==

224

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

225

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

226

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

181

"""

227

"""

182

assert targetsize is None or 0 <= targetsize

228

assert targetsize is None or 0 <= targetsize

183

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

229

startdata = revlog.start(revs[0])

230

enddata = revlog.end(revs[-1])

231

fullspan = enddata - startdata

232

if targetsize is None or fullspan <= targetsize:

184

yield revs

233

yield revs

185

return

234

return

186

235

187

startrevidx = 0

236

startrevidx = 0

188

startdata = revlog.start(revs[0])

189

endrevidx = 0

237

endrevidx = 0

190

iterrevs = enumerate(revs)

238

iterrevs = enumerate(revs)

191

next(iterrevs) # skip first rev.

239

next(iterrevs) # skip first rev.

240

# first step: get snapshots out of the way

192

for idx, r in iterrevs:

241

for idx, r in iterrevs:

193

span = revlog.end(r) - startdata

242

span = revlog.end(r) - startdata

194

if span <= targetsize:

243

snapshot = revlog.issnapshot(r)

244

if span <= targetsize and snapshot:

195

endrevidx = idx

245

endrevidx = idx

196

else:

246

else:

197

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

247

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

198

if chunk:

248

if chunk:

199

yield chunk

249

yield chunk

200

startrevidx = idx

250

startrevidx = idx

201

startdata = revlog.start(r)

251

startdata = revlog.start(r)

202

endrevidx = idx

252

endrevidx = idx

203

yield _trimchunk(revlog, revs, startrevidx)

253

if not snapshot:

254

break

255

256

# for the others, we use binary slicing to quickly converge toward valid

257

# chunks (otherwise, we might end up looking for start/end of many

258

# revisions). This logic is not looking for the perfect slicing point, it

259

# focuses on quickly converging toward valid chunks.

260

nbitem = len(revs)

261

while (enddata - startdata) > targetsize:

262

endrevidx = nbitem

263

if nbitem - startrevidx <= 1:

264

break # protect against individual chunk larger than limit

265

localenddata = revlog.end(revs[endrevidx - 1])

266

span = localenddata - startdata

267

while (localenddata - startdata) > targetsize:

268

if endrevidx - startrevidx <= 1:

269

break # protect against individual chunk larger than limit

270

endrevidx -= (endrevidx - startrevidx) // 2

271

localenddata = revlog.end(revs[endrevidx - 1])

272

span = localenddata - startdata

273

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

274

if chunk:

275

yield chunk

276

startrevidx = endrevidx

277

startdata = revlog.start(revs[startrevidx])

278

279

chunk = _trimchunk(revlog, revs, startrevidx)

280

if chunk:

281

yield chunk

204

282

205

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

283

def _slicechunktodensity(revlog, revs, targetdensity=0.5,

206

mingapsize=0):

284

mingapsize=0):

207

"""slice revs to reduce the amount of unrelated data to be read from disk.

285

"""slice revs to reduce the amount of unrelated data to be read from disk.

208

286

209

``revs`` is sliced into groups that should be read in one time.

287

``revs`` is sliced into groups that should be read in one time.

210

Assume that revs are sorted.

288

Assume that revs are sorted.

211

289

212

The initial chunk is sliced until the overall density (payload/chunks-span

290

The initial chunk is sliced until the overall density (payload/chunks-span

213

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

291

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

214

skipped.

292

skipped.

215

293

216

>>> revlog = _testrevlog([

294

>>> revlog = _testrevlog([

217

... 5, #00 (5)

295

... 5, #00 (5)

218

... 10, #01 (5)

296

... 10, #01 (5)

219

... 12, #02 (2)

297

... 12, #02 (2)

220

... 12, #03 (empty)

298

... 12, #03 (empty)

221

... 27, #04 (15)

299

... 27, #04 (15)

222

... 31, #05 (4)

300

... 31, #05 (4)

223

... 31, #06 (empty)

301

... 31, #06 (empty)

224

... 42, #07 (11)

302

... 42, #07 (11)

225

... 47, #08 (5)

303

... 47, #08 (5)

226

... 47, #09 (empty)

304

... 47, #09 (empty)

227

... 48, #10 (1)

305

... 48, #10 (1)

228

... 51, #11 (3)

306

... 51, #11 (3)

229

... 74, #12 (23)

307

... 74, #12 (23)

230

... 85, #13 (11)

308

... 85, #13 (11)

231

... 86, #14 (1)

309

... 86, #14 (1)

232

... 91, #15 (5)

310

... 91, #15 (5)

233

... ])

311

... ])

234

312

235

>>> list(_slicechunktodensity(revlog, list(range(16))))

313

>>> list(_slicechunktodensity(revlog, list(range(16))))

236

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

314

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

237

>>> list(_slicechunktodensity(revlog, [0, 15]))

315

>>> list(_slicechunktodensity(revlog, [0, 15]))

238

[[0], [15]]

316

[[0], [15]]

239

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

317

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

240

[[0], [11], [15]]

318

[[0], [11], [15]]

241

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

319

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

242

[[0], [11, 13, 15]]

320

[[0], [11, 13, 15]]

243

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

321

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

244

[[1, 2], [5, 8, 10, 11], [14]]

322

[[1, 2], [5, 8, 10, 11], [14]]

245

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

323

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

246

... mingapsize=20))

324

... mingapsize=20))

247

[[1, 2, 3, 5, 8, 10, 11], [14]]

325

[[1, 2, 3, 5, 8, 10, 11], [14]]

248

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

326

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

249

... targetdensity=0.95))

327

... targetdensity=0.95))

250

[[1, 2], [5], [8, 10, 11], [14]]

328

[[1, 2], [5], [8, 10, 11], [14]]

251

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

329

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

252

... targetdensity=0.95, mingapsize=12))

330

... targetdensity=0.95, mingapsize=12))

253

[[1, 2], [5, 8, 10, 11], [14]]

331

[[1, 2], [5, 8, 10, 11], [14]]

254

"""

332

"""

255

start = revlog.start

333

start = revlog.start

256

length = revlog.length

334

length = revlog.length

257

335

258

if len(revs) <= 1:

336

if len(revs) <= 1:

259

yield revs

337

yield revs

260

return

338

return

261

339

262

deltachainspan = segmentspan(revlog, revs)

340

deltachainspan = segmentspan(revlog, revs)

263

341

264

if deltachainspan < mingapsize:

342

if deltachainspan < mingapsize:

265

yield revs

343

yield revs

266

return

344

return

267

345

268

readdata = deltachainspan

346

readdata = deltachainspan

269

chainpayload = sum(length(r) for r in revs)

347

chainpayload = sum(length(r) for r in revs)

270

348

271

if deltachainspan:

349

if deltachainspan:

272

density = chainpayload / float(deltachainspan)

350

density = chainpayload / float(deltachainspan)

273

else:

351

else:

274

density = 1.0

352

density = 1.0

275

353

276

if density >= targetdensity:

354

if density >= targetdensity:

277

yield revs

355

yield revs

278

return

356

return

279

357

280

# Store the gaps in a heap to have them sorted by decreasing size

358

# Store the gaps in a heap to have them sorted by decreasing size

281

gaps = []

359

gaps = []

282

prevend = None

360

prevend = None

283

for i, rev in enumerate(revs):

361

for i, rev in enumerate(revs):

284

revstart = start(rev)

362

revstart = start(rev)

285

revlen = length(rev)

363

revlen = length(rev)

286

364

287

# Skip empty revisions to form larger holes

365

# Skip empty revisions to form larger holes

288

if revlen == 0:

366

if revlen == 0:

289

continue

367

continue

290

368

291

if prevend is not None:

369

if prevend is not None:

292

gapsize = revstart - prevend

370

gapsize = revstart - prevend

293

# only consider holes that are large enough

371

# only consider holes that are large enough

294

if gapsize > mingapsize:

372

if gapsize > mingapsize:

295

gaps.append((gapsize, i))

373

gaps.append((gapsize, i))

296

374

297

prevend = revstart + revlen

375

prevend = revstart + revlen

298

# sort the gaps to pop them from largest to small

376

# sort the gaps to pop them from largest to small

299

gaps.sort()

377

gaps.sort()

300

378

301

# Collect the indices of the largest holes until the density is acceptable

379

# Collect the indices of the largest holes until the density is acceptable

302

selected = []

380

selected = []

303

while gaps and density < targetdensity:

381

while gaps and density < targetdensity:

304

gapsize, gapidx = gaps.pop()

382

gapsize, gapidx = gaps.pop()

305

383

306

selected.append(gapidx)

384

selected.append(gapidx)

307

385

308

# the gap sizes are stored as negatives to be sorted decreasingly

386

# the gap sizes are stored as negatives to be sorted decreasingly

309

# by the heap

387

# by the heap

310

readdata -= gapsize

388

readdata -= gapsize

311

if readdata > 0:

389

if readdata > 0:

312

density = chainpayload / float(readdata)

390

density = chainpayload / float(readdata)

313

else:

391

else:

314

density = 1.0

392

density = 1.0

315

selected.sort()

393

selected.sort()

316

394

317

# Cut the revs at collected indices

395

# Cut the revs at collected indices

318

previdx = 0

396

previdx = 0

319

for idx in selected:

397

for idx in selected:

320

398

321

chunk = _trimchunk(revlog, revs, previdx, idx)

399

chunk = _trimchunk(revlog, revs, previdx, idx)

322

if chunk:

400

if chunk:

323

yield chunk

401

yield chunk

324

402

325

previdx = idx

403

previdx = idx

326

404

327

chunk = _trimchunk(revlog, revs, previdx)

405

chunk = _trimchunk(revlog, revs, previdx)

328

if chunk:

406

if chunk:

329

yield chunk

407

yield chunk

330

408

331

def _trimchunk(revlog, revs, startidx, endidx=None):

409

def _trimchunk(revlog, revs, startidx, endidx=None):

332

"""returns revs[startidx:endidx] without empty trailing revs

410

"""returns revs[startidx:endidx] without empty trailing revs

333

411

334

Doctest Setup

412

Doctest Setup

335

>>> revlog = _testrevlog([

413

>>> revlog = _testrevlog([

336

... 5, #0

414

... 5, #0

337

... 10, #1

415

... 10, #1

338

... 12, #2

416

... 12, #2

339

... 12, #3 (empty)

417

... 12, #3 (empty)

340

... 17, #4

418

... 17, #4

341

... 21, #5

419

... 21, #5

342

... 21, #6 (empty)

420

... 21, #6 (empty)

343

... ])

421

... ])

344

422

345

Contiguous cases:

423

Contiguous cases:

346

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

424

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

347

[0, 1, 2, 3, 4, 5]

425

[0, 1, 2, 3, 4, 5]

348

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

426

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

349

[0, 1, 2, 3, 4]

427

[0, 1, 2, 3, 4]

350

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

428

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

351

[0, 1, 2]

429

[0, 1, 2]

352

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

430

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

353

[2]

431

[2]

354

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

432

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

355

[3, 4, 5]

433

[3, 4, 5]

356

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

434

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

357

[3, 4]

435

[3, 4]

358

436

359

Discontiguous cases:

437

Discontiguous cases:

360

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

438

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

361

[1, 3, 5]

439

[1, 3, 5]

362

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

440

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

363

[1]

441

[1]

364

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

442

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

365

[3, 5]

443

[3, 5]

366

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

444

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

367

[3, 5]

445

[3, 5]

368

"""

446

"""

369

length = revlog.length

447

length = revlog.length

370

448

371

if endidx is None:

449

if endidx is None:

372

endidx = len(revs)

450

endidx = len(revs)

373

451

374

# If we have a non-emtpy delta candidate, there are nothing to trim

452

# If we have a non-emtpy delta candidate, there are nothing to trim

375

if revs[endidx - 1] < len(revlog):

453

if revs[endidx - 1] < len(revlog):

376

# Trim empty revs at the end, except the very first revision of a chain

454

# Trim empty revs at the end, except the very first revision of a chain

377

while (endidx > 1

455

while (endidx > 1

378

and endidx > startidx

456

and endidx > startidx

379

and length(revs[endidx - 1]) == 0):

457

and length(revs[endidx - 1]) == 0):

380

endidx -= 1

458

endidx -= 1

381

459

382

return revs[startidx:endidx]

460

return revs[startidx:endidx]

383

461

384

def segmentspan(revlog, revs):

462

def segmentspan(revlog, revs):

385

"""Get the byte span of a segment of revisions

463

"""Get the byte span of a segment of revisions

386

464

387

revs is a sorted array of revision numbers

465

revs is a sorted array of revision numbers

388

466

389

>>> revlog = _testrevlog([

467

>>> revlog = _testrevlog([

390

... 5, #0

468

... 5, #0

391

... 10, #1

469

... 10, #1

392

... 12, #2

470

... 12, #2

393

... 12, #3 (empty)

471

... 12, #3 (empty)

394

... 17, #4

472

... 17, #4

395

... ])

473

... ])

396

474

397

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

475

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

398

17

476

17

399

>>> segmentspan(revlog, [0, 4])

477

>>> segmentspan(revlog, [0, 4])

400

17

478

17

401

>>> segmentspan(revlog, [3, 4])

479

>>> segmentspan(revlog, [3, 4])

402

5

480

5

403

>>> segmentspan(revlog, [1, 2, 3,])

481

>>> segmentspan(revlog, [1, 2, 3,])

404

7

482

7

405

>>> segmentspan(revlog, [1, 3])

483

>>> segmentspan(revlog, [1, 3])

406

7

484

7

407

"""

485

"""

408

if not revs:

486

if not revs:

409

return 0

487

return 0

410

end = revlog.end(revs[-1])

488

end = revlog.end(revs[-1])

411

return end - revlog.start(revs[0])

489

return end - revlog.start(revs[0])

412

490

413

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

491

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

414

"""build full text from a (base, delta) pair and other metadata"""

492

"""build full text from a (base, delta) pair and other metadata"""

415

# special case deltas which replace entire base; no need to decode

493

# special case deltas which replace entire base; no need to decode

416

# base revision. this neatly avoids censored bases, which throw when

494

# base revision. this neatly avoids censored bases, which throw when

417

# they're decoded.

495

# they're decoded.

418

hlen = struct.calcsize(">lll")

496

hlen = struct.calcsize(">lll")

419

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

497

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

420

len(delta) - hlen):

498

len(delta) - hlen):

421

fulltext = delta[hlen:]

499

fulltext = delta[hlen:]

422

else:

500

else:

423

# deltabase is rawtext before changed by flag processors, which is

501

# deltabase is rawtext before changed by flag processors, which is

424

# equivalent to non-raw text

502

# equivalent to non-raw text

425

basetext = revlog.revision(baserev, _df=fh, raw=False)

503

basetext = revlog.revision(baserev, _df=fh, raw=False)

426

fulltext = mdiff.patch(basetext, delta)

504

fulltext = mdiff.patch(basetext, delta)

427

505

428

try:

506

try:

429

res = revlog._processflags(fulltext, flags, 'read', raw=True)

507

res = revlog._processflags(fulltext, flags, 'read', raw=True)

430

fulltext, validatehash = res

508

fulltext, validatehash = res

431

if validatehash:

509

if validatehash:

432

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

510

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

433

if flags & REVIDX_ISCENSORED:

511

if flags & REVIDX_ISCENSORED:

434

raise error.StorageError(_('node %s is not censored') %

512

raise error.StorageError(_('node %s is not censored') %

435

expectednode)

513

expectednode)

436

except error.CensoredNodeError:

514

except error.CensoredNodeError:

437

# must pass the censored index flag to add censored revisions

515

# must pass the censored index flag to add censored revisions

438

if not flags & REVIDX_ISCENSORED:

516

if not flags & REVIDX_ISCENSORED:

439

raise

517

raise

440

return fulltext

518

return fulltext

441

519

442

@attr.s(slots=True, frozen=True)

520

@attr.s(slots=True, frozen=True)

443

class _deltainfo(object):

521

class _deltainfo(object):

444

distance = attr.ib()

522

distance = attr.ib()

445

deltalen = attr.ib()

523

deltalen = attr.ib()

446

data = attr.ib()

524

data = attr.ib()

447

base = attr.ib()

525

base = attr.ib()

448

chainbase = attr.ib()

526

chainbase = attr.ib()

449

chainlen = attr.ib()

527

chainlen = attr.ib()

450

compresseddeltalen = attr.ib()

528

compresseddeltalen = attr.ib()

451

snapshotdepth = attr.ib()

529

snapshotdepth = attr.ib()

452

530

453

def isgooddeltainfo(revlog, deltainfo, revinfo):

531

def isgooddeltainfo(revlog, deltainfo, revinfo):

454

"""Returns True if the given delta is good. Good means that it is within

532

"""Returns True if the given delta is good. Good means that it is within

455

the disk span, disk size, and chain length bounds that we know to be

533

the disk span, disk size, and chain length bounds that we know to be

456

performant."""

534

performant."""

457

if deltainfo is None:

535

if deltainfo is None:

458

return False

536

return False

459

537

460

# - 'deltainfo.distance' is the distance from the base revision --

538

# - 'deltainfo.distance' is the distance from the base revision --

461

# bounding it limits the amount of I/O we need to do.

539

# bounding it limits the amount of I/O we need to do.

462

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

540

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

463

# deltas we need to apply -- bounding it limits the amount of CPU

541

# deltas we need to apply -- bounding it limits the amount of CPU

464

# we consume.

542

# we consume.

465

543

466

textlen = revinfo.textlen

544

textlen = revinfo.textlen

467

defaultmax = textlen * 4

545

defaultmax = textlen * 4

468

maxdist = revlog._maxdeltachainspan

546

maxdist = revlog._maxdeltachainspan

469

if not maxdist:

547

if not maxdist:

470

maxdist = deltainfo.distance # ensure the conditional pass

548

maxdist = deltainfo.distance # ensure the conditional pass

471

maxdist = max(maxdist, defaultmax)

549

maxdist = max(maxdist, defaultmax)

472

550

473

# Bad delta from read span:

551

# Bad delta from read span:

474

#

552

#

475

# If the span of data read is larger than the maximum allowed.

553

# If the span of data read is larger than the maximum allowed.

476

#

554

#

477

# In the sparse-revlog case, we rely on the associated "sparse reading"

555

# In the sparse-revlog case, we rely on the associated "sparse reading"

478

# to avoid issue related to the span of data. In theory, it would be

556

# to avoid issue related to the span of data. In theory, it would be

479

# possible to build pathological revlog where delta pattern would lead

557

# possible to build pathological revlog where delta pattern would lead

480

# to too many reads. However, they do not happen in practice at all. So

558

# to too many reads. However, they do not happen in practice at all. So

481

# we skip the span check entirely.

559

# we skip the span check entirely.

482

if not revlog._sparserevlog and maxdist < deltainfo.distance:

560

if not revlog._sparserevlog and maxdist < deltainfo.distance:

483

return False

561

return False

484

562

485

# Bad delta from new delta size:

563

# Bad delta from new delta size:

486

#

564

#

487

# If the delta size is larger than the target text, storing the

565

# If the delta size is larger than the target text, storing the

488

# delta will be inefficient.

566

# delta will be inefficient.

489

if textlen < deltainfo.deltalen:

567

if textlen < deltainfo.deltalen:

490

return False

568

return False

491

569

492

# Bad delta from cumulated payload size:

570

# Bad delta from cumulated payload size:

493

#

571

#

494

# If the sum of delta get larger than K * target text length.

572

# If the sum of delta get larger than K * target text length.

495

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

573

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

496

return False

574

return False

497

575

498

# Bad delta from chain length:

576

# Bad delta from chain length:

499

#

577

#

500

# If the number of delta in the chain gets too high.

578

# If the number of delta in the chain gets too high.

501

if (revlog._maxchainlen

579

if (revlog._maxchainlen

502

and revlog._maxchainlen < deltainfo.chainlen):

580

and revlog._maxchainlen < deltainfo.chainlen):

503

return False

581

return False

504

582

505

# bad delta from intermediate snapshot size limit

583

# bad delta from intermediate snapshot size limit

506

#

584

#

507

# If an intermediate snapshot size is higher than the limit. The

585

# If an intermediate snapshot size is higher than the limit. The

508

# limit exist to prevent endless chain of intermediate delta to be

586

# limit exist to prevent endless chain of intermediate delta to be

509

# created.

587

# created.

510

if (deltainfo.snapshotdepth is not None and

588

if (deltainfo.snapshotdepth is not None and

511

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

589

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

512

return False

590

return False

513

591

514

# bad delta if new intermediate snapshot is larger than the previous

592

# bad delta if new intermediate snapshot is larger than the previous

515

# snapshot

593

# snapshot

516

if (deltainfo.snapshotdepth

594

if (deltainfo.snapshotdepth

517

and revlog.length(deltainfo.base) < deltainfo.deltalen):

595

and revlog.length(deltainfo.base) < deltainfo.deltalen):

518

return False

596

return False

519

597

520

return True

598

return True

521

599

522

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

600

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

523

"""Provides group of revision to be tested as delta base

601

"""Provides group of revision to be tested as delta base

524

602

525

This top level function focus on emitting groups with unique and worthwhile

603

This top level function focus on emitting groups with unique and worthwhile

526

content. See _raw_candidate_groups for details about the group order.

604

content. See _raw_candidate_groups for details about the group order.

527

"""

605

"""

528

# should we try to build a delta?

606

# should we try to build a delta?

529

if not (len(revlog) and revlog._storedeltachains):

607

if not (len(revlog) and revlog._storedeltachains):

530

yield None

608

yield None

531

return

609

return

532

610

533

deltalength = revlog.length

611

deltalength = revlog.length

534

deltaparent = revlog.deltaparent

612

deltaparent = revlog.deltaparent

535

good = None

613

good = None

536

614

537

deltas_limit = textlen * LIMIT_DELTA2TEXT

615

deltas_limit = textlen * LIMIT_DELTA2TEXT

538

616

539

tested = set([nullrev])

617

tested = set([nullrev])

540

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

618

candidates = _refinedgroups(revlog, p1, p2, cachedelta)

541

while True:

619

while True:

542

temptative = candidates.send(good)

620

temptative = candidates.send(good)

543

if temptative is None:

621

if temptative is None:

544

break

622

break

545

group = []

623

group = []

546

for rev in temptative:

624

for rev in temptative:

547

# skip over empty delta (no need to include them in a chain)

625

# skip over empty delta (no need to include them in a chain)

548

while (revlog._generaldelta

626

while (revlog._generaldelta

549

and not (rev == nullrev

627

and not (rev == nullrev

550

or rev in tested

628

or rev in tested

551

or deltalength(rev))):

629

or deltalength(rev))):

552

tested.add(rev)

630

tested.add(rev)

553

rev = deltaparent(rev)

631

rev = deltaparent(rev)

554

# filter out revision we tested already

632

# filter out revision we tested already

555

if rev in tested:

633

if rev in tested:

556

continue

634

continue

557

tested.add(rev)

635

tested.add(rev)

558

# filter out delta base that will never produce good delta

636

# filter out delta base that will never produce good delta

559

if deltas_limit < revlog.length(rev):

637

if deltas_limit < revlog.length(rev):

560

continue

638

continue

561

# no need to try a delta against nullrev, this will be done as a

639

# no need to try a delta against nullrev, this will be done as a

562

# last resort.

640

# last resort.

563

if rev == nullrev:

641

if rev == nullrev:

564

continue

642

continue

565

# no delta for rawtext-changing revs (see "candelta" for why)

643

# no delta for rawtext-changing revs (see "candelta" for why)

566

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

644

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

567

continue

645

continue

568

group.append(rev)

646

group.append(rev)

569

if group:

647

if group:

570

# XXX: in the sparse revlog case, group can become large,

648

# XXX: in the sparse revlog case, group can become large,

571

# impacting performances. Some bounding or slicing mecanism

649

# impacting performances. Some bounding or slicing mecanism

572

# would help to reduce this impact.

650

# would help to reduce this impact.

573

good = yield tuple(group)

651

good = yield tuple(group)

574

yield None

652

yield None

575

653

576

def _findsnapshots(revlog, cache, start_rev):

654

def _findsnapshots(revlog, cache, start_rev):

577

"""find snapshot from start_rev to tip"""

655

"""find snapshot from start_rev to tip"""

578

deltaparent = revlog.deltaparent

656

deltaparent = revlog.deltaparent

579

issnapshot = revlog.issnapshot

657

issnapshot = revlog.issnapshot

580

for rev in revlog.revs(start_rev):

658

for rev in revlog.revs(start_rev):

581

if issnapshot(rev):

659

if issnapshot(rev):

582

cache[deltaparent(rev)].append(rev)

660

cache[deltaparent(rev)].append(rev)

583

661

584

def _refinedgroups(revlog, p1, p2, cachedelta):

662

def _refinedgroups(revlog, p1, p2, cachedelta):

585

good = None

663

good = None

586

# First we try to reuse a the delta contained in the bundle.

664

# First we try to reuse a the delta contained in the bundle.

587

# (or from the source revlog)

665

# (or from the source revlog)

588

#

666

#

589

# This logic only applies to general delta repositories and can be disabled

667

# This logic only applies to general delta repositories and can be disabled

590

# through configuration. Disabling reuse source delta is useful when

668

# through configuration. Disabling reuse source delta is useful when

591

# we want to make sure we recomputed "optimal" deltas.

669

# we want to make sure we recomputed "optimal" deltas.

592

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

670

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

593

# Assume what we received from the server is a good choice

671

# Assume what we received from the server is a good choice

594

# build delta will reuse the cache

672

# build delta will reuse the cache

595

good = yield (cachedelta[0],)

673

good = yield (cachedelta[0],)

596

if good is not None:

674

if good is not None:

597

yield None

675

yield None

598

return

676

return

599

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

677

for candidates in _rawgroups(revlog, p1, p2, cachedelta):

600

good = yield candidates

678

good = yield candidates

601

if good is not None:

679

if good is not None:

602

break

680

break

603

681

604

# If sparse revlog is enabled, we can try to refine the available deltas

682

# If sparse revlog is enabled, we can try to refine the available deltas

605

if not revlog._sparserevlog:

683

if not revlog._sparserevlog:

606

yield None

684

yield None

607

return

685

return

608

686

609

# if we have a refinable value, try to refine it

687

# if we have a refinable value, try to refine it

610

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

688

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

611

# refine snapshot down

689

# refine snapshot down

612

previous = None

690

previous = None

613

while previous != good:

691

while previous != good:

614

previous = good

692

previous = good

615

base = revlog.deltaparent(good)

693

base = revlog.deltaparent(good)

616

if base == nullrev:

694

if base == nullrev:

617

break

695

break

618

good = yield (base,)

696

good = yield (base,)

619

# refine snapshot up

697

# refine snapshot up

620

#

698

#

621

# XXX the _findsnapshots call can be expensive and is "duplicated" with

699

# XXX the _findsnapshots call can be expensive and is "duplicated" with

622

# the one done in `_rawgroups`. Once we start working on performance,

700

# the one done in `_rawgroups`. Once we start working on performance,

623

# we should make the two logics share this computation.

701

# we should make the two logics share this computation.

624

snapshots = collections.defaultdict(list)

702

snapshots = collections.defaultdict(list)

625

_findsnapshots(revlog, snapshots, good + 1)

703

_findsnapshots(revlog, snapshots, good + 1)

626

previous = None

704

previous = None

627

while good != previous:

705

while good != previous:

628

previous = good

706

previous = good

629

children = tuple(sorted(c for c in snapshots[good]))

707

children = tuple(sorted(c for c in snapshots[good]))

630

good = yield children

708

good = yield children

631

709

632

# we have found nothing

710

# we have found nothing

633

yield None

711

yield None

634

712

635

def _rawgroups(revlog, p1, p2, cachedelta):

713

def _rawgroups(revlog, p1, p2, cachedelta):

636

"""Provides group of revision to be tested as delta base

714

"""Provides group of revision to be tested as delta base

637

715

638

This lower level function focus on emitting delta theorically interresting

716

This lower level function focus on emitting delta theorically interresting

639

without looking it any practical details.

717

without looking it any practical details.

640

718

641

The group order aims at providing fast or small candidates first.

719

The group order aims at providing fast or small candidates first.

642

"""

720

"""

643

gdelta = revlog._generaldelta

721

gdelta = revlog._generaldelta

644

sparse = revlog._sparserevlog

722

sparse = revlog._sparserevlog

645

curr = len(revlog)

723

curr = len(revlog)

646

prev = curr - 1

724

prev = curr - 1

647

deltachain = lambda rev: revlog._deltachain(rev)[0]

725

deltachain = lambda rev: revlog._deltachain(rev)[0]

648

726

649

if gdelta:

727

if gdelta:

650

# exclude already lazy tested base if any

728

# exclude already lazy tested base if any

651

parents = [p for p in (p1, p2) if p != nullrev]

729

parents = [p for p in (p1, p2) if p != nullrev]

652

730

653

if not revlog._deltabothparents and len(parents) == 2:

731

if not revlog._deltabothparents and len(parents) == 2:

654

parents.sort()

732

parents.sort()

655

# To minimize the chance of having to build a fulltext,

733

# To minimize the chance of having to build a fulltext,

656

# pick first whichever parent is closest to us (max rev)

734

# pick first whichever parent is closest to us (max rev)

657

yield (parents[1],)

735

yield (parents[1],)

658

# then the other one (min rev) if the first did not fit

736

# then the other one (min rev) if the first did not fit

659

yield (parents[0],)

737

yield (parents[0],)

660

elif len(parents) > 0:

738

elif len(parents) > 0:

661

# Test all parents (1 or 2), and keep the best candidate

739

# Test all parents (1 or 2), and keep the best candidate

662

yield parents

740

yield parents

663

741

664

if sparse and parents:

742

if sparse and parents:

665

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

743

snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev

666

# See if we can use an existing snapshot in the parent chains to use as

744

# See if we can use an existing snapshot in the parent chains to use as

667

# a base for a new intermediate-snapshot

745

# a base for a new intermediate-snapshot

668

#

746

#

669

# search for snapshot in parents delta chain

747

# search for snapshot in parents delta chain

670

# map: snapshot-level: snapshot-rev

748

# map: snapshot-level: snapshot-rev

671

parents_snaps = collections.defaultdict(set)

749

parents_snaps = collections.defaultdict(set)

672

candidate_chains = [deltachain(p) for p in parents]

750

candidate_chains = [deltachain(p) for p in parents]

673

for chain in candidate_chains:

751

for chain in candidate_chains:

674

for idx, s in enumerate(chain):

752

for idx, s in enumerate(chain):

675

if not revlog.issnapshot(s):

753

if not revlog.issnapshot(s):

676

break

754

break

677

parents_snaps[idx].add(s)

755

parents_snaps[idx].add(s)

678

snapfloor = min(parents_snaps[0]) + 1

756

snapfloor = min(parents_snaps[0]) + 1

679

_findsnapshots(revlog, snapshots, snapfloor)

757

_findsnapshots(revlog, snapshots, snapfloor)

680

# search for the highest "unrelated" revision

758

# search for the highest "unrelated" revision

681

#

759

#

682

# Adding snapshots used by "unrelated" revision increase the odd we

760

# Adding snapshots used by "unrelated" revision increase the odd we

683

# reuse an independant, yet better snapshot chain.

761

# reuse an independant, yet better snapshot chain.

684

#

762

#

685

# XXX instead of building a set of revisions, we could lazily enumerate

763

# XXX instead of building a set of revisions, we could lazily enumerate

686

# over the chains. That would be more efficient, however we stick to

764

# over the chains. That would be more efficient, however we stick to

687

# simple code for now.

765

# simple code for now.

688

all_revs = set()

766

all_revs = set()

689

for chain in candidate_chains:

767

for chain in candidate_chains:

690

all_revs.update(chain)

768

all_revs.update(chain)

691

other = None

769

other = None

692

for r in revlog.revs(prev, snapfloor):

770

for r in revlog.revs(prev, snapfloor):

693

if r not in all_revs:

771

if r not in all_revs:

694

other = r

772

other = r

695

break

773

break

696

if other is not None:

774

if other is not None:

697

# To avoid unfair competition, we won't use unrelated intermediate

775

# To avoid unfair competition, we won't use unrelated intermediate

698

# snapshot that are deeper than the ones from the parent delta

776

# snapshot that are deeper than the ones from the parent delta

699

# chain.

777

# chain.

700

max_depth = max(parents_snaps.keys())

778

max_depth = max(parents_snaps.keys())

701

chain = deltachain(other)

779

chain = deltachain(other)

702

for idx, s in enumerate(chain):

780

for idx, s in enumerate(chain):

703

if s < snapfloor:

781

if s < snapfloor:

704

continue

782

continue

705

if max_depth < idx:

783

if max_depth < idx:

706

break

784

break

707

if not revlog.issnapshot(s):

785

if not revlog.issnapshot(s):

708

break

786

break

709

parents_snaps[idx].add(s)

787

parents_snaps[idx].add(s)

710

# Test them as possible intermediate snapshot base

788

# Test them as possible intermediate snapshot base

711

# We test them from highest to lowest level. High level one are more

789

# We test them from highest to lowest level. High level one are more

712

# likely to result in small delta

790

# likely to result in small delta

713

floor = None

791

floor = None

714

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

792

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

715

siblings = set()

793

siblings = set()

716

for s in snaps:

794

for s in snaps:

717

siblings.update(snapshots[s])

795

siblings.update(snapshots[s])

718

# Before considering making a new intermediate snapshot, we check

796

# Before considering making a new intermediate snapshot, we check

719

# if an existing snapshot, children of base we consider, would be

797

# if an existing snapshot, children of base we consider, would be

720

# suitable.

798

# suitable.

721

#

799

#

722

# It give a change to reuse a delta chain "unrelated" to the

800

# It give a change to reuse a delta chain "unrelated" to the

723

# current revision instead of starting our own. Without such

801

# current revision instead of starting our own. Without such

724

# re-use, topological branches would keep reopening new chains.

802

# re-use, topological branches would keep reopening new chains.

725

# Creating more and more snapshot as the repository grow.

803

# Creating more and more snapshot as the repository grow.

726

804

727

if floor is not None:

805

if floor is not None:

728

# We only do this for siblings created after the one in our

806

# We only do this for siblings created after the one in our

729

# parent's delta chain. Those created before has less chances

807

# parent's delta chain. Those created before has less chances

730

# to be valid base since our ancestors had to create a new

808

# to be valid base since our ancestors had to create a new

731

# snapshot.

809

# snapshot.

732

siblings = [r for r in siblings if floor < r]

810

siblings = [r for r in siblings if floor < r]

733

yield tuple(sorted(siblings))

811

yield tuple(sorted(siblings))

734

# then test the base from our parent's delta chain.

812

# then test the base from our parent's delta chain.

735

yield tuple(sorted(snaps))

813

yield tuple(sorted(snaps))

736

floor = min(snaps)

814

floor = min(snaps)

737

# No suitable base found in the parent chain, search if any full

815

# No suitable base found in the parent chain, search if any full

738

# snapshots emitted since parent's base would be a suitable base for an

816

# snapshots emitted since parent's base would be a suitable base for an

739

# intermediate snapshot.

817

# intermediate snapshot.

740

#

818

#

741

# It give a chance to reuse a delta chain unrelated to the current

819

# It give a chance to reuse a delta chain unrelated to the current

742

# revisions instead of starting our own. Without such re-use,

820

# revisions instead of starting our own. Without such re-use,

743

# topological branches would keep reopening new full chains. Creating

821

# topological branches would keep reopening new full chains. Creating

744

# more and more snapshot as the repository grow.

822

# more and more snapshot as the repository grow.

745

yield tuple(snapshots[nullrev])

823

yield tuple(snapshots[nullrev])

746

824

747

if not sparse:

825

if not sparse:

748

# other approach failed try against prev to hopefully save us a

826

# other approach failed try against prev to hopefully save us a

749

# fulltext.

827

# fulltext.

750

yield (prev,)

828

yield (prev,)

751

829

752

class deltacomputer(object):

830

class deltacomputer(object):

753

def __init__(self, revlog):

831

def __init__(self, revlog):

754

self.revlog = revlog

832

self.revlog = revlog

755

833

756

def buildtext(self, revinfo, fh):

834

def buildtext(self, revinfo, fh):

757

"""Builds a fulltext version of a revision

835

"""Builds a fulltext version of a revision

758

836

759

revinfo: _revisioninfo instance that contains all needed info

837

revinfo: _revisioninfo instance that contains all needed info

760

fh: file handle to either the .i or the .d revlog file,

838

fh: file handle to either the .i or the .d revlog file,

761

depending on whether it is inlined or not

839

depending on whether it is inlined or not

762

"""

840

"""

763

btext = revinfo.btext

841

btext = revinfo.btext

764

if btext[0] is not None:

842

if btext[0] is not None:

765

return btext[0]

843

return btext[0]

766

844

767

revlog = self.revlog

845

revlog = self.revlog

768

cachedelta = revinfo.cachedelta

846

cachedelta = revinfo.cachedelta

769

baserev = cachedelta[0]

847

baserev = cachedelta[0]

770

delta = cachedelta[1]

848

delta = cachedelta[1]

771

849

772

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

850

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

773

revinfo.p1, revinfo.p2,

851

revinfo.p1, revinfo.p2,

774

revinfo.flags, revinfo.node)

852

revinfo.flags, revinfo.node)

775

return fulltext

853

return fulltext

776

854

777

def _builddeltadiff(self, base, revinfo, fh):

855

def _builddeltadiff(self, base, revinfo, fh):

778

revlog = self.revlog

856

revlog = self.revlog

779

t = self.buildtext(revinfo, fh)

857

t = self.buildtext(revinfo, fh)

780

if revlog.iscensored(base):

858

if revlog.iscensored(base):

781

# deltas based on a censored revision must replace the

859

# deltas based on a censored revision must replace the

782

# full content in one patch, so delta works everywhere

860

# full content in one patch, so delta works everywhere

783

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

861

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

784

delta = header + t

862

delta = header + t

785

else:

863

else:

786

ptext = revlog.revision(base, _df=fh, raw=True)

864

ptext = revlog.revision(base, _df=fh, raw=True)

787

delta = mdiff.textdiff(ptext, t)

865

delta = mdiff.textdiff(ptext, t)

788

866

789

return delta

867

return delta

790

868

791

def _builddeltainfo(self, revinfo, base, fh):

869

def _builddeltainfo(self, revinfo, base, fh):

792

# can we use the cached delta?

870

# can we use the cached delta?

793

delta = None

871

delta = None

794

if revinfo.cachedelta:

872

if revinfo.cachedelta:

795

cachebase, cachediff = revinfo.cachedelta

873

cachebase, cachediff = revinfo.cachedelta

796

#check if the diff still apply

874

#check if the diff still apply

797

currentbase = cachebase

875

currentbase = cachebase

798

while (currentbase != nullrev

876

while (currentbase != nullrev

799

and currentbase != base

877

and currentbase != base

800

and self.revlog.length(currentbase) == 0):

878

and self.revlog.length(currentbase) == 0):

801

currentbase = self.revlog.deltaparent(currentbase)

879

currentbase = self.revlog.deltaparent(currentbase)

802

if currentbase == base:

880

if currentbase == base:

803

delta = revinfo.cachedelta[1]

881

delta = revinfo.cachedelta[1]

804

if delta is None:

882

if delta is None:

805

delta = self._builddeltadiff(base, revinfo, fh)

883

delta = self._builddeltadiff(base, revinfo, fh)

806

revlog = self.revlog

884

revlog = self.revlog

807

header, data = revlog.compress(delta)

885

header, data = revlog.compress(delta)

808

deltalen = len(header) + len(data)

886

deltalen = len(header) + len(data)

809

chainbase = revlog.chainbase(base)

887

chainbase = revlog.chainbase(base)

810

offset = revlog.end(len(revlog) - 1)

888

offset = revlog.end(len(revlog) - 1)

811

dist = deltalen + offset - revlog.start(chainbase)

889

dist = deltalen + offset - revlog.start(chainbase)

812

if revlog._generaldelta:

890

if revlog._generaldelta:

813

deltabase = base

891

deltabase = base

814

else:

892

else:

815

deltabase = chainbase

893

deltabase = chainbase

816

chainlen, compresseddeltalen = revlog._chaininfo(base)

894

chainlen, compresseddeltalen = revlog._chaininfo(base)

817

chainlen += 1

895

chainlen += 1

818

compresseddeltalen += deltalen

896

compresseddeltalen += deltalen

819

897

820

revlog = self.revlog

898

revlog = self.revlog

821

snapshotdepth = None

899

snapshotdepth = None

822

if deltabase == nullrev:

900

if deltabase == nullrev:

823

snapshotdepth = 0

901

snapshotdepth = 0

824

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

902

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

825

# A delta chain should always be one full snapshot,

903

# A delta chain should always be one full snapshot,

826

# zero or more semi-snapshots, and zero or more deltas

904

# zero or more semi-snapshots, and zero or more deltas

827

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

905

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

828

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

906

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

829

snapshotdepth = len(revlog._deltachain(deltabase)[0])

907

snapshotdepth = len(revlog._deltachain(deltabase)[0])

830

908

831

return _deltainfo(dist, deltalen, (header, data), deltabase,

909

return _deltainfo(dist, deltalen, (header, data), deltabase,

832

chainbase, chainlen, compresseddeltalen,

910

chainbase, chainlen, compresseddeltalen,

833

snapshotdepth)

911

snapshotdepth)

834

912

835

def _fullsnapshotinfo(self, fh, revinfo):

913

def _fullsnapshotinfo(self, fh, revinfo):

836

curr = len(self.revlog)

914

curr = len(self.revlog)

837

rawtext = self.buildtext(revinfo, fh)

915

rawtext = self.buildtext(revinfo, fh)

838

data = self.revlog.compress(rawtext)

916

data = self.revlog.compress(rawtext)

839

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

917

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

840

deltabase = chainbase = curr

918

deltabase = chainbase = curr

841

snapshotdepth = 0

919

snapshotdepth = 0

842

chainlen = 1

920

chainlen = 1

843

921

844

return _deltainfo(dist, deltalen, data, deltabase,

922

return _deltainfo(dist, deltalen, data, deltabase,

845

chainbase, chainlen, compresseddeltalen,

923

chainbase, chainlen, compresseddeltalen,

846

snapshotdepth)

924

snapshotdepth)

847

925

848

def finddeltainfo(self, revinfo, fh):

926

def finddeltainfo(self, revinfo, fh):

849

"""Find an acceptable delta against a candidate revision

927

"""Find an acceptable delta against a candidate revision

850

928

851

revinfo: information about the revision (instance of _revisioninfo)

929

revinfo: information about the revision (instance of _revisioninfo)

852

fh: file handle to either the .i or the .d revlog file,

930

fh: file handle to either the .i or the .d revlog file,

853

depending on whether it is inlined or not

931

depending on whether it is inlined or not

854

932

855

Returns the first acceptable candidate revision, as ordered by

933

Returns the first acceptable candidate revision, as ordered by

856

_candidategroups

934

_candidategroups

857

935

858

If no suitable deltabase is found, we return delta info for a full

936

If no suitable deltabase is found, we return delta info for a full

859

snapshot.

937

snapshot.

860

"""

938

"""

861

if not revinfo.textlen:

939

if not revinfo.textlen:

862

return self._fullsnapshotinfo(fh, revinfo)

940

return self._fullsnapshotinfo(fh, revinfo)

863

941

864

# no delta for flag processor revision (see "candelta" for why)

942

# no delta for flag processor revision (see "candelta" for why)

865

# not calling candelta since only one revision needs test, also to

943

# not calling candelta since only one revision needs test, also to

866

# avoid overhead fetching flags again.

944

# avoid overhead fetching flags again.

867

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

945

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

868

return self._fullsnapshotinfo(fh, revinfo)

946

return self._fullsnapshotinfo(fh, revinfo)

869

947

870

cachedelta = revinfo.cachedelta

948

cachedelta = revinfo.cachedelta

871

p1 = revinfo.p1

949

p1 = revinfo.p1

872

p2 = revinfo.p2

950

p2 = revinfo.p2

873

revlog = self.revlog

951

revlog = self.revlog

874

952

875

deltainfo = None

953

deltainfo = None

876

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

954

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

877

groups = _candidategroups(self.revlog, revinfo.textlen,

955

groups = _candidategroups(self.revlog, revinfo.textlen,

878

p1r, p2r, cachedelta)

956

p1r, p2r, cachedelta)

879

candidaterevs = next(groups)

957

candidaterevs = next(groups)

880

while candidaterevs is not None:

958

while candidaterevs is not None:

881

nominateddeltas = []

959

nominateddeltas = []

882

if deltainfo is not None:

960

if deltainfo is not None:

883

# if we already found a good delta,

961

# if we already found a good delta,

884

# challenge it against refined candidates

962

# challenge it against refined candidates

885

nominateddeltas.append(deltainfo)

963

nominateddeltas.append(deltainfo)

886

for candidaterev in candidaterevs:

964

for candidaterev in candidaterevs:

887

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

965

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

888

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

966

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

889

nominateddeltas.append(candidatedelta)

967

nominateddeltas.append(candidatedelta)

890

if nominateddeltas:

968

if nominateddeltas:

891

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

969

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

892

if deltainfo is not None:

970

if deltainfo is not None:

893

candidaterevs = groups.send(deltainfo.base)

971

candidaterevs = groups.send(deltainfo.base)

894

else:

972

else:

895

candidaterevs = next(groups)

973

candidaterevs = next(groups)

896

974

897

if deltainfo is None:

975

if deltainfo is None:

898

deltainfo = self._fullsnapshotinfo(fh, revinfo)

976

deltainfo = self._fullsnapshotinfo(fh, revinfo)

899

return deltainfo

977

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
-                >>> revlog = _testrevlog([
+                >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
-                ... ])
+                ... ]
+                >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
-                >>> revlog = _testrevlog([
+                >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
-                ... ])
+                ... ]
+                == All snapshots cases ==
+                >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
+                == No Snapshot cases ==
+                >>> revlog = _testrevlog(data)
+                Cases where chunk is already small enough
+                >>> list(_slicechunktosize(revlog, [0], 3))
+                [[0]]
+                >>> list(_slicechunktosize(revlog, [6, 7], 3))
+                [[6, 7]]
+                >>> list(_slicechunktosize(revlog, [0], None))
+                [[0]]
+                >>> list(_slicechunktosize(revlog, [6, 7], None))
+                [[6, 7]]
+                cases where we need actual slicing
+                >>> list(_slicechunktosize(revlog, [0, 1], 3))
+                [[0], [1]]
+                >>> list(_slicechunktosize(revlog, [1, 3], 3))
+                [[1], [3]]
+                >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
+                [[1], [2, 3]]
+                >>> list(_slicechunktosize(revlog, [3, 5], 3))
+                [[3], [5]]
+                >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
+                [[3], [4, 5]]
+                >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
+                [[5], [6, 7, 8]]
+                >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
+                [[0], [1, 2], [3], [5], [6, 7, 8]]
+                Case with too large individual chunk (must return valid chunk)
+                >>> list(_slicechunktosize(revlog, [0, 1], 2))
+                [[0], [1]]
+                >>> list(_slicechunktosize(revlog, [1, 3], 1))
+                [[1], [3]]
+                >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
+                [[3], [5]]
+                == mixed case ==
+                >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
+                >>> list(_slicechunktosize(revlog, list(range(9)), 5))
+                [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
-                if targetsize is None or segmentspan(revlog, revs) <= targetsize:
+                startdata = revlog.start(revs[0])
+                enddata = revlog.end(revs[-1])
+                fullspan = enddata - startdata
+                if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
-                startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
+                # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
-                    if span <= targetsize:
+                    snapshot = revlog.issnapshot(r)
+                    if span <= targetsize and snapshot:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
-                yield _trimchunk(revlog, revs, startrevidx)
+                    if not snapshot:
+                        break
+                # for the others, we use binary slicing to quickly converge toward valid
+                # chunks (otherwise, we might end up looking for start/end of many
+                # revisions). This logic is not looking for the perfect slicing point, it
+                # focuses on quickly converging toward valid chunks.
+                nbitem = len(revs)
+                while (enddata - startdata) > targetsize:
+                    endrevidx = nbitem
+                    if nbitem - startrevidx <= 1:
+                        break # protect against individual chunk larger than limit
+                    localenddata = revlog.end(revs[endrevidx - 1])
+                    span = localenddata - startdata
+                    while (localenddata - startdata) > targetsize:
+                        if endrevidx - startrevidx <= 1:
+                            break # protect against individual chunk larger than limit
+                        endrevidx -= (endrevidx - startrevidx) // 2
+                        localenddata = revlog.end(revs[endrevidx - 1])
+                        span = localenddata - startdata
+                    chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
+                    if chunk:
+                        yield chunk
+                    startrevidx = endrevidx
+                    startdata = revlog.start(revs[startrevidx])
+                chunk = _trimchunk(revlog, revs, startrevidx)
+                if chunk:
+                    yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(_('node %s is not censored') %
                                                  expectednode)
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, textlen, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = set([nullrev])
                 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while (revlog._generaldelta
                                and not (rev == nullrev
                                         or rev in tested
                                         or deltalength(rev))):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 deltaparent = revlog.deltaparent
                 issnapshot = revlog.issnapshot
                 for rev in revlog.revs(start_rev):
                     if issnapshot(rev):
                         cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     good = yield (cachedelta[0],)
                     if good is not None:
                         yield None
                         return
                 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     #
                     # XXX the _findsnapshots call can be expensive and is "duplicated" with
                     # the one done in `_rawgroups`. Once we start working on performance,
                     # we should make the two logics share this computation.
                     snapshots = collections.defaultdict(list)
                     _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 # we have found nothing
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 sparse = revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         #check if the diff still apply
                         currentbase = cachebase
                         while (currentbase != nullrev
                                 and currentbase != base
                                 and self.revlog.length(currentbase) == 0):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     groups = _candidategroups(self.revlog, revinfo.textlen,
                                                          p1r, p2r, cachedelta)
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         nominateddeltas = []
                         if deltainfo is not None:
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo