upstream/mercurial-mirror Commit - r50507:2afee217

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

KIND_CHANGELOG,

23

KIND_CHANGELOG,

24

KIND_FILELOG,

24

KIND_FILELOG,

25

KIND_MANIFESTLOG,

25

KIND_MANIFESTLOG,

26

REVIDX_ISCENSORED,

26

REVIDX_ISCENSORED,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

)

28

)

29

30

from ..thirdparty import attr

30

from ..thirdparty import attr

31

32

from .. import (

32

from .. import (

33

error,

33

error,

34

mdiff,

34

mdiff,

35

util,

35

util,

36

)

36

)

37

38

from . import flagutil

38

from . import flagutil

39

40

# maximum <delta-chain-data>/<revision-text-length> ratio

40

# maximum <delta-chain-data>/<revision-text-length> ratio

41

LIMIT_DELTA2TEXT = 2

41

LIMIT_DELTA2TEXT = 2

42

43

44

class _testrevlog:

44

class _testrevlog:

45

"""minimalist fake revlog to use in doctests"""

45

"""minimalist fake revlog to use in doctests"""

46

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

"""data is an list of revision payload boundaries"""

48

"""data is an list of revision payload boundaries"""

49

self._data = data

49

self._data = data

50

self._srdensitythreshold = density

50

self._srdensitythreshold = density

51

self._srmingapsize = mingap

51

self._srmingapsize = mingap

52

self._snapshot = set(snapshot)

52

self._snapshot = set(snapshot)

53

self.index = None

53

self.index = None

54

55

def start(self, rev):

55

def start(self, rev):

56

if rev == nullrev:

56

if rev == nullrev:

57

return 0

57

return 0

58

if rev == 0:

58

if rev == 0:

59

return 0

59

return 0

60

return self._data[rev - 1]

60

return self._data[rev - 1]

61

62

def end(self, rev):

62

def end(self, rev):

63

if rev == nullrev:

63

if rev == nullrev:

64

return 0

64

return 0

65

return self._data[rev]

65

return self._data[rev]

66

67

def length(self, rev):

67

def length(self, rev):

68

return self.end(rev) - self.start(rev)

68

return self.end(rev) - self.start(rev)

69

70

def __len__(self):

70

def __len__(self):

71

return len(self._data)

71

return len(self._data)

72

73

def issnapshot(self, rev):

73

def issnapshot(self, rev):

74

if rev == nullrev:

74

if rev == nullrev:

75

return True

75

return True

76

return rev in self._snapshot

76

return rev in self._snapshot

77

78

79

def slicechunk(revlog, revs, targetsize=None):

79

def slicechunk(revlog, revs, targetsize=None):

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

81

82

``revs`` is sliced into groups that should be read in one time.

82

``revs`` is sliced into groups that should be read in one time.

83

Assume that revs are sorted.

83

Assume that revs are sorted.

84

85

The initial chunk is sliced until the overall density (payload/chunks-span

85

The initial chunk is sliced until the overall density (payload/chunks-span

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

87

`revlog._srmingapsize` is skipped.

87

`revlog._srmingapsize` is skipped.

88

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

90

For consistency with other slicing choice, this limit won't go lower than

90

For consistency with other slicing choice, this limit won't go lower than

91

`revlog._srmingapsize`.

91

`revlog._srmingapsize`.

92

93

If individual revisions chunk are larger than this limit, they will still

93

If individual revisions chunk are larger than this limit, they will still

94

be raised individually.

94

be raised individually.

95

96

>>> data = [

96

>>> data = [

97

... 5, #00 (5)

97

... 5, #00 (5)

98

... 10, #01 (5)

98

... 10, #01 (5)

99

... 12, #02 (2)

99

... 12, #02 (2)

100

... 12, #03 (empty)

100

... 12, #03 (empty)

101

... 27, #04 (15)

101

... 27, #04 (15)

102

... 31, #05 (4)

102

... 31, #05 (4)

103

... 31, #06 (empty)

103

... 31, #06 (empty)

104

... 42, #07 (11)

104

... 42, #07 (11)

105

... 47, #08 (5)

105

... 47, #08 (5)

106

... 47, #09 (empty)

106

... 47, #09 (empty)

107

... 48, #10 (1)

107

... 48, #10 (1)

108

... 51, #11 (3)

108

... 51, #11 (3)

109

... 74, #12 (23)

109

... 74, #12 (23)

110

... 85, #13 (11)

110

... 85, #13 (11)

111

... 86, #14 (1)

111

... 86, #14 (1)

112

... 91, #15 (5)

112

... 91, #15 (5)

113

... ]

113

... ]

114

>>> revlog = _testrevlog(data, snapshot=range(16))

114

>>> revlog = _testrevlog(data, snapshot=range(16))

115

116

>>> list(slicechunk(revlog, list(range(16))))

116

>>> list(slicechunk(revlog, list(range(16))))

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

118

>>> list(slicechunk(revlog, [0, 15]))

118

>>> list(slicechunk(revlog, [0, 15]))

119

[[0], [15]]

119

[[0], [15]]

120

>>> list(slicechunk(revlog, [0, 11, 15]))

120

>>> list(slicechunk(revlog, [0, 11, 15]))

121

[[0], [11], [15]]

121

[[0], [11], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

123

[[0], [11, 13, 15]]

123

[[0], [11, 13, 15]]

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

125

[[1, 2], [5, 8, 10, 11], [14]]

125

[[1, 2], [5, 8, 10, 11], [14]]

126

127

Slicing with a maximum chunk size

127

Slicing with a maximum chunk size

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

129

[[0], [11], [13], [15]]

129

[[0], [11], [13], [15]]

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

131

[[0], [11], [13, 15]]

131

[[0], [11], [13, 15]]

132

133

Slicing involving nullrev

133

Slicing involving nullrev

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

135

[[-1, 0], [11], [13, 15]]

135

[[-1, 0], [11], [13, 15]]

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

137

[[-1], [13], [15]]

137

[[-1], [13], [15]]

138

"""

138

"""

139

if targetsize is not None:

139

if targetsize is not None:

140

targetsize = max(targetsize, revlog._srmingapsize)

140

targetsize = max(targetsize, revlog._srmingapsize)

141

# targetsize should not be specified when evaluating delta candidates:

141

# targetsize should not be specified when evaluating delta candidates:

142

# * targetsize is used to ensure we stay within specification when reading,

142

# * targetsize is used to ensure we stay within specification when reading,

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

144

if densityslicing is None:

144

if densityslicing is None:

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

146

for chunk in densityslicing(

146

for chunk in densityslicing(

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

148

):

148

):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

150

yield subchunk

150

yield subchunk

151

152

153

def _slicechunktosize(revlog, revs, targetsize=None):

153

def _slicechunktosize(revlog, revs, targetsize=None):

154

"""slice revs to match the target size

154

"""slice revs to match the target size

155

156

This is intended to be used on chunk that density slicing selected by that

156

This is intended to be used on chunk that density slicing selected by that

157

are still too large compared to the read garantee of revlog. This might

157

are still too large compared to the read garantee of revlog. This might

158

happens when "minimal gap size" interrupted the slicing or when chain are

158

happens when "minimal gap size" interrupted the slicing or when chain are

159

built in a way that create large blocks next to each other.

159

built in a way that create large blocks next to each other.

160

161

>>> data = [

161

>>> data = [

162

... 3, #0 (3)

162

... 3, #0 (3)

163

... 5, #1 (2)

163

... 5, #1 (2)

164

... 6, #2 (1)

164

... 6, #2 (1)

165

... 8, #3 (2)

165

... 8, #3 (2)

166

... 8, #4 (empty)

166

... 8, #4 (empty)

167

... 11, #5 (3)

167

... 11, #5 (3)

168

... 12, #6 (1)

168

... 12, #6 (1)

169

... 13, #7 (1)

169

... 13, #7 (1)

170

... 14, #8 (1)

170

... 14, #8 (1)

171

... ]

171

... ]

172

173

== All snapshots cases ==

173

== All snapshots cases ==

174

>>> revlog = _testrevlog(data, snapshot=range(9))

174

>>> revlog = _testrevlog(data, snapshot=range(9))

175

176

Cases where chunk is already small enough

176

Cases where chunk is already small enough

177

>>> list(_slicechunktosize(revlog, [0], 3))

177

>>> list(_slicechunktosize(revlog, [0], 3))

178

[[0]]

178

[[0]]

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

180

[[6, 7]]

180

[[6, 7]]

181

>>> list(_slicechunktosize(revlog, [0], None))

181

>>> list(_slicechunktosize(revlog, [0], None))

182

[[0]]

182

[[0]]

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

184

[[6, 7]]

184

[[6, 7]]

185

186

cases where we need actual slicing

186

cases where we need actual slicing

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

188

[[0], [1]]

188

[[0], [1]]

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

190

[[1], [3]]

190

[[1], [3]]

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

192

[[1, 2], [3]]

192

[[1, 2], [3]]

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

194

[[3], [5]]

194

[[3], [5]]

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

198

[[5], [6, 7, 8]]

198

[[5], [6, 7, 8]]

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

201

202

Case with too large individual chunk (must return valid chunk)

202

Case with too large individual chunk (must return valid chunk)

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

204

[[0], [1]]

204

[[0], [1]]

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

206

[[1], [3]]

206

[[1], [3]]

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

208

[[3], [5]]

208

[[3], [5]]

209

210

== No Snapshot cases ==

210

== No Snapshot cases ==

211

>>> revlog = _testrevlog(data)

211

>>> revlog = _testrevlog(data)

212

213

Cases where chunk is already small enough

213

Cases where chunk is already small enough

214

>>> list(_slicechunktosize(revlog, [0], 3))

214

>>> list(_slicechunktosize(revlog, [0], 3))

215

[[0]]

215

[[0]]

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

217

[[6, 7]]

217

[[6, 7]]

218

>>> list(_slicechunktosize(revlog, [0], None))

218

>>> list(_slicechunktosize(revlog, [0], None))

219

[[0]]

219

[[0]]

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

221

[[6, 7]]

221

[[6, 7]]

222

223

cases where we need actual slicing

223

cases where we need actual slicing

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

225

[[0], [1]]

225

[[0], [1]]

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

227

[[1], [3]]

227

[[1], [3]]

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

229

[[1], [2, 3]]

229

[[1], [2, 3]]

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

231

[[3], [5]]

231

[[3], [5]]

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

233

[[3], [4, 5]]

233

[[3], [4, 5]]

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

235

[[5], [6, 7, 8]]

235

[[5], [6, 7, 8]]

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

238

239

Case with too large individual chunk (must return valid chunk)

239

Case with too large individual chunk (must return valid chunk)

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

241

[[0], [1]]

241

[[0], [1]]

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

243

[[1], [3]]

243

[[1], [3]]

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

245

[[3], [5]]

245

[[3], [5]]

246

247

== mixed case ==

247

== mixed case ==

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

251

"""

251

"""

252

assert targetsize is None or 0 <= targetsize

252

assert targetsize is None or 0 <= targetsize

253

startdata = revlog.start(revs[0])

253

startdata = revlog.start(revs[0])

254

enddata = revlog.end(revs[-1])

254

enddata = revlog.end(revs[-1])

255

fullspan = enddata - startdata

255

fullspan = enddata - startdata

256

if targetsize is None or fullspan <= targetsize:

256

if targetsize is None or fullspan <= targetsize:

257

yield revs

257

yield revs

258

return

258

return

259

260

startrevidx = 0

260

startrevidx = 0

261

endrevidx = 1

261

endrevidx = 1

262

iterrevs = enumerate(revs)

262

iterrevs = enumerate(revs)

263

next(iterrevs) # skip first rev.

263

next(iterrevs) # skip first rev.

264

# first step: get snapshots out of the way

264

# first step: get snapshots out of the way

265

for idx, r in iterrevs:

265

for idx, r in iterrevs:

266

span = revlog.end(r) - startdata

266

span = revlog.end(r) - startdata

267

snapshot = revlog.issnapshot(r)

267

snapshot = revlog.issnapshot(r)

268

if span <= targetsize and snapshot:

268

if span <= targetsize and snapshot:

269

endrevidx = idx + 1

269

endrevidx = idx + 1

270

else:

270

else:

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

272

if chunk:

272

if chunk:

273

yield chunk

273

yield chunk

274

startrevidx = idx

274

startrevidx = idx

275

startdata = revlog.start(r)

275

startdata = revlog.start(r)

276

endrevidx = idx + 1

276

endrevidx = idx + 1

277

if not snapshot:

277

if not snapshot:

278

break

278

break

279

280

# for the others, we use binary slicing to quickly converge toward valid

280

# for the others, we use binary slicing to quickly converge toward valid

281

# chunks (otherwise, we might end up looking for start/end of many

281

# chunks (otherwise, we might end up looking for start/end of many

282

# revisions). This logic is not looking for the perfect slicing point, it

282

# revisions). This logic is not looking for the perfect slicing point, it

283

# focuses on quickly converging toward valid chunks.

283

# focuses on quickly converging toward valid chunks.

284

nbitem = len(revs)

284

nbitem = len(revs)

285

while (enddata - startdata) > targetsize:

285

while (enddata - startdata) > targetsize:

286

endrevidx = nbitem

286

endrevidx = nbitem

287

if nbitem - startrevidx <= 1:

287

if nbitem - startrevidx <= 1:

288

break # protect against individual chunk larger than limit

288

break # protect against individual chunk larger than limit

289

localenddata = revlog.end(revs[endrevidx - 1])

289

localenddata = revlog.end(revs[endrevidx - 1])

290

span = localenddata - startdata

290

span = localenddata - startdata

291

while span > targetsize:

291

while span > targetsize:

292

if endrevidx - startrevidx <= 1:

292

if endrevidx - startrevidx <= 1:

293

break # protect against individual chunk larger than limit

293

break # protect against individual chunk larger than limit

294

endrevidx -= (endrevidx - startrevidx) // 2

294

endrevidx -= (endrevidx - startrevidx) // 2

295

localenddata = revlog.end(revs[endrevidx - 1])

295

localenddata = revlog.end(revs[endrevidx - 1])

296

span = localenddata - startdata

296

span = localenddata - startdata

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

298

if chunk:

298

if chunk:

299

yield chunk

299

yield chunk

300

startrevidx = endrevidx

300

startrevidx = endrevidx

301

startdata = revlog.start(revs[startrevidx])

301

startdata = revlog.start(revs[startrevidx])

302

303

chunk = _trimchunk(revlog, revs, startrevidx)

303

chunk = _trimchunk(revlog, revs, startrevidx)

304

if chunk:

304

if chunk:

305

yield chunk

305

yield chunk

306

307

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

310

311

``revs`` is sliced into groups that should be read in one time.

311

``revs`` is sliced into groups that should be read in one time.

312

Assume that revs are sorted.

312

Assume that revs are sorted.

313

314

The initial chunk is sliced until the overall density (payload/chunks-span

314

The initial chunk is sliced until the overall density (payload/chunks-span

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

316

skipped.

316

skipped.

317

318

>>> revlog = _testrevlog([

318

>>> revlog = _testrevlog([

319

... 5, #00 (5)

319

... 5, #00 (5)

320

... 10, #01 (5)

320

... 10, #01 (5)

321

... 12, #02 (2)

321

... 12, #02 (2)

322

... 12, #03 (empty)

322

... 12, #03 (empty)

323

... 27, #04 (15)

323

... 27, #04 (15)

324

... 31, #05 (4)

324

... 31, #05 (4)

325

... 31, #06 (empty)

325

... 31, #06 (empty)

326

... 42, #07 (11)

326

... 42, #07 (11)

327

... 47, #08 (5)

327

... 47, #08 (5)

328

... 47, #09 (empty)

328

... 47, #09 (empty)

329

... 48, #10 (1)

329

... 48, #10 (1)

330

... 51, #11 (3)

330

... 51, #11 (3)

331

... 74, #12 (23)

331

... 74, #12 (23)

332

... 85, #13 (11)

332

... 85, #13 (11)

333

... 86, #14 (1)

333

... 86, #14 (1)

334

... 91, #15 (5)

334

... 91, #15 (5)

335

... ])

335

... ])

336

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

340

[[0], [15]]

340

[[0], [15]]

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

342

[[0], [11], [15]]

342

[[0], [11], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

344

[[0], [11, 13, 15]]

344

[[0], [11, 13, 15]]

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

346

[[1, 2], [5, 8, 10, 11], [14]]

346

[[1, 2], [5, 8, 10, 11], [14]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

... mingapsize=20))

348

... mingapsize=20))

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

... targetdensity=0.95))

351

... targetdensity=0.95))

352

[[1, 2], [5], [8, 10, 11], [14]]

352

[[1, 2], [5], [8, 10, 11], [14]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

354

... targetdensity=0.95, mingapsize=12))

354

... targetdensity=0.95, mingapsize=12))

355

[[1, 2], [5, 8, 10, 11], [14]]

355

[[1, 2], [5, 8, 10, 11], [14]]

356

"""

356

"""

357

start = revlog.start

357

start = revlog.start

358

length = revlog.length

358

length = revlog.length

359

360

if len(revs) <= 1:

360

if len(revs) <= 1:

361

yield revs

361

yield revs

362

return

362

return

363

364

deltachainspan = segmentspan(revlog, revs)

364

deltachainspan = segmentspan(revlog, revs)

365

366

if deltachainspan < mingapsize:

366

if deltachainspan < mingapsize:

367

yield revs

367

yield revs

368

return

368

return

369

370

readdata = deltachainspan

370

readdata = deltachainspan

371

chainpayload = sum(length(r) for r in revs)

371

chainpayload = sum(length(r) for r in revs)

372

373

if deltachainspan:

373

if deltachainspan:

374

density = chainpayload / float(deltachainspan)

374

density = chainpayload / float(deltachainspan)

375

else:

375

else:

376

density = 1.0

376

density = 1.0

377

378

if density >= targetdensity:

378

if density >= targetdensity:

379

yield revs

379

yield revs

380

return

380

return

381

382

# Store the gaps in a heap to have them sorted by decreasing size

382

# Store the gaps in a heap to have them sorted by decreasing size

383

gaps = []

383

gaps = []

384

prevend = None

384

prevend = None

385

for i, rev in enumerate(revs):

385

for i, rev in enumerate(revs):

386

revstart = start(rev)

386

revstart = start(rev)

387

revlen = length(rev)

387

revlen = length(rev)

388

389

# Skip empty revisions to form larger holes

389

# Skip empty revisions to form larger holes

390

if revlen == 0:

390

if revlen == 0:

391

continue

391

continue

392

393

if prevend is not None:

393

if prevend is not None:

394

gapsize = revstart - prevend

394

gapsize = revstart - prevend

395

# only consider holes that are large enough

395

# only consider holes that are large enough

396

if gapsize > mingapsize:

396

if gapsize > mingapsize:

397

gaps.append((gapsize, i))

397

gaps.append((gapsize, i))

398

399

prevend = revstart + revlen

399

prevend = revstart + revlen

400

# sort the gaps to pop them from largest to small

400

# sort the gaps to pop them from largest to small

401

gaps.sort()

401

gaps.sort()

402

403

# Collect the indices of the largest holes until the density is acceptable

403

# Collect the indices of the largest holes until the density is acceptable

404

selected = []

404

selected = []

405

while gaps and density < targetdensity:

405

while gaps and density < targetdensity:

406

gapsize, gapidx = gaps.pop()

406

gapsize, gapidx = gaps.pop()

407

408

selected.append(gapidx)

408

selected.append(gapidx)

409

410

# the gap sizes are stored as negatives to be sorted decreasingly

410

# the gap sizes are stored as negatives to be sorted decreasingly

411

# by the heap

411

# by the heap

412

readdata -= gapsize

412

readdata -= gapsize

413

if readdata > 0:

413

if readdata > 0:

414

density = chainpayload / float(readdata)

414

density = chainpayload / float(readdata)

415

else:

415

else:

416

density = 1.0

416

density = 1.0

417

selected.sort()

417

selected.sort()

418

419

# Cut the revs at collected indices

419

# Cut the revs at collected indices

420

previdx = 0

420

previdx = 0

421

for idx in selected:

421

for idx in selected:

422

423

chunk = _trimchunk(revlog, revs, previdx, idx)

423

chunk = _trimchunk(revlog, revs, previdx, idx)

424

if chunk:

424

if chunk:

425

yield chunk

425

yield chunk

426

427

previdx = idx

427

previdx = idx

428

429

chunk = _trimchunk(revlog, revs, previdx)

429

chunk = _trimchunk(revlog, revs, previdx)

430

if chunk:

430

if chunk:

431

yield chunk

431

yield chunk

432

433

434

def _trimchunk(revlog, revs, startidx, endidx=None):

434

def _trimchunk(revlog, revs, startidx, endidx=None):

435

"""returns revs[startidx:endidx] without empty trailing revs

435

"""returns revs[startidx:endidx] without empty trailing revs

436

437

Doctest Setup

437

Doctest Setup

438

>>> revlog = _testrevlog([

438

>>> revlog = _testrevlog([

439

... 5, #0

439

... 5, #0

440

... 10, #1

440

... 10, #1

441

... 12, #2

441

... 12, #2

442

... 12, #3 (empty)

442

... 12, #3 (empty)

443

... 17, #4

443

... 17, #4

444

... 21, #5

444

... 21, #5

445

... 21, #6 (empty)

445

... 21, #6 (empty)

446

... ])

446

... ])

447

448

Contiguous cases:

448

Contiguous cases:

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

450

[0, 1, 2, 3, 4, 5]

450

[0, 1, 2, 3, 4, 5]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

452

[0, 1, 2, 3, 4]

452

[0, 1, 2, 3, 4]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

454

[0, 1, 2]

454

[0, 1, 2]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

456

[2]

456

[2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

458

[3, 4, 5]

458

[3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

460

[3, 4]

460

[3, 4]

461

462

Discontiguous cases:

462

Discontiguous cases:

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

464

[1, 3, 5]

464

[1, 3, 5]

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

466

[1]

466

[1]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

468

[3, 5]

468

[3, 5]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

470

[3, 5]

470

[3, 5]

471

"""

471

"""

472

length = revlog.length

472

length = revlog.length

473

474

if endidx is None:

474

if endidx is None:

475

endidx = len(revs)

475

endidx = len(revs)

476

477

# If we have a non-emtpy delta candidate, there are nothing to trim

477

# If we have a non-emtpy delta candidate, there are nothing to trim

478

if revs[endidx - 1] < len(revlog):

478

if revs[endidx - 1] < len(revlog):

479

# Trim empty revs at the end, except the very first revision of a chain

479

# Trim empty revs at the end, except the very first revision of a chain

480

while (

480

while (

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

482

):

482

):

483

endidx -= 1

483

endidx -= 1

484

485

return revs[startidx:endidx]

485

return revs[startidx:endidx]

486

487

488

def segmentspan(revlog, revs):

488

def segmentspan(revlog, revs):

489

"""Get the byte span of a segment of revisions

489

"""Get the byte span of a segment of revisions

490

491

revs is a sorted array of revision numbers

491

revs is a sorted array of revision numbers

492

493

>>> revlog = _testrevlog([

493

>>> revlog = _testrevlog([

494

... 5, #0

494

... 5, #0

495

... 10, #1

495

... 10, #1

496

... 12, #2

496

... 12, #2

497

... 12, #3 (empty)

497

... 12, #3 (empty)

498

... 17, #4

498

... 17, #4

499

... ])

499

... ])

500

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

502

17

502

17

503

>>> segmentspan(revlog, [0, 4])

503

>>> segmentspan(revlog, [0, 4])

504

17

504

17

505

>>> segmentspan(revlog, [3, 4])

505

>>> segmentspan(revlog, [3, 4])

506

5

506

5

507

>>> segmentspan(revlog, [1, 2, 3,])

507

>>> segmentspan(revlog, [1, 2, 3,])

508

7

508

7

509

>>> segmentspan(revlog, [1, 3])

509

>>> segmentspan(revlog, [1, 3])

510

7

510

7

511

"""

511

"""

512

if not revs:

512

if not revs:

513

return 0

513

return 0

514

end = revlog.end(revs[-1])

514

end = revlog.end(revs[-1])

515

return end - revlog.start(revs[0])

515

return end - revlog.start(revs[0])

516

517

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

519

"""build full text from a (base, delta) pair and other metadata"""

519

"""build full text from a (base, delta) pair and other metadata"""

520

# special case deltas which replace entire base; no need to decode

520

# special case deltas which replace entire base; no need to decode

521

# base revision. this neatly avoids censored bases, which throw when

521

# base revision. this neatly avoids censored bases, which throw when

522

# they're decoded.

522

# they're decoded.

523

hlen = struct.calcsize(b">lll")

523

hlen = struct.calcsize(b">lll")

524

if delta[:hlen] == mdiff.replacediffheader(

524

if delta[:hlen] == mdiff.replacediffheader(

525

revlog.rawsize(baserev), len(delta) - hlen

525

revlog.rawsize(baserev), len(delta) - hlen

526

):

526

):

527

fulltext = delta[hlen:]

527

fulltext = delta[hlen:]

528

else:

528

else:

529

# deltabase is rawtext before changed by flag processors, which is

529

# deltabase is rawtext before changed by flag processors, which is

530

# equivalent to non-raw text

530

# equivalent to non-raw text

531

basetext = revlog.revision(baserev, _df=fh)

531

basetext = revlog.revision(baserev, _df=fh)

532

fulltext = mdiff.patch(basetext, delta)

532

fulltext = mdiff.patch(basetext, delta)

533

534

try:

534

try:

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

536

if validatehash:

536

if validatehash:

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

538

if flags & REVIDX_ISCENSORED:

538

if flags & REVIDX_ISCENSORED:

539

raise error.StorageError(

539

raise error.StorageError(

540

_(b'node %s is not censored') % expectednode

540

_(b'node %s is not censored') % expectednode

541

)

541

)

542

except error.CensoredNodeError:

542

except error.CensoredNodeError:

543

# must pass the censored index flag to add censored revisions

543

# must pass the censored index flag to add censored revisions

544

if not flags & REVIDX_ISCENSORED:

544

if not flags & REVIDX_ISCENSORED:

545

raise

545

raise

546

return fulltext

546

return fulltext

547

548

549

@attr.s(slots=True, frozen=True)

549

@attr.s(slots=True, frozen=True)

550

class _deltainfo:

550

class _deltainfo:

551

distance = attr.ib()

551

distance = attr.ib()

552

deltalen = attr.ib()

552

deltalen = attr.ib()

553

data = attr.ib()

553

data = attr.ib()

554

base = attr.ib()

554

base = attr.ib()

555

chainbase = attr.ib()

555

chainbase = attr.ib()

556

chainlen = attr.ib()

556

chainlen = attr.ib()

557

compresseddeltalen = attr.ib()

557

compresseddeltalen = attr.ib()

558

snapshotdepth = attr.ib()

558

snapshotdepth = attr.ib()

559

560

561

def drop_u_compression(delta):

561

def drop_u_compression(delta):

562

"""turn into a "u" (no-compression) into no-compression without header

562

"""turn into a "u" (no-compression) into no-compression without header

563

564

This is useful for revlog format that has better compression method.

564

This is useful for revlog format that has better compression method.

565

"""

565

"""

566

assert delta.data[0] == b'u', delta.data[0]

566

assert delta.data[0] == b'u', delta.data[0]

567

return _deltainfo(

567

return _deltainfo(

568

delta.distance,

568

delta.distance,

569

delta.deltalen - 1,

569

delta.deltalen - 1,

570

(b'', delta.data[1]),

570

(b'', delta.data[1]),

571

delta.base,

571

delta.base,

572

delta.chainbase,

572

delta.chainbase,

573

delta.chainlen,

573

delta.chainlen,

574

delta.compresseddeltalen,

574

delta.compresseddeltalen,

575

delta.snapshotdepth,

575

delta.snapshotdepth,

576

)

576

)

577

578

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

580

"""Returns True if the given delta is good. Good means that it is within

580

"""Returns True if the given delta is good. Good means that it is within

581

the disk span, disk size, and chain length bounds that we know to be

581

the disk span, disk size, and chain length bounds that we know to be

582

performant."""

582

performant."""

583

if deltainfo is None:

583

if deltainfo is None:

584

return False

584

return False

585

586

# - 'deltainfo.distance' is the distance from the base revision --

586

# - 'deltainfo.distance' is the distance from the base revision --

587

# bounding it limits the amount of I/O we need to do.

587

# bounding it limits the amount of I/O we need to do.

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

589

# deltas we need to apply -- bounding it limits the amount of CPU

589

# deltas we need to apply -- bounding it limits the amount of CPU

590

# we consume.

590

# we consume.

591

592

textlen = revinfo.textlen

592

textlen = revinfo.textlen

593

defaultmax = textlen * 4

593

defaultmax = textlen * 4

594

maxdist = revlog._maxdeltachainspan

594

maxdist = revlog._maxdeltachainspan

595

if not maxdist:

595

if not maxdist:

596

maxdist = deltainfo.distance # ensure the conditional pass

596

maxdist = deltainfo.distance # ensure the conditional pass

597

maxdist = max(maxdist, defaultmax)

597

maxdist = max(maxdist, defaultmax)

598

599

# Bad delta from read span:

599

# Bad delta from read span:

600

#

600

#

601

# If the span of data read is larger than the maximum allowed.

601

# If the span of data read is larger than the maximum allowed.

602

#

602

#

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

604

# to avoid issue related to the span of data. In theory, it would be

604

# to avoid issue related to the span of data. In theory, it would be

605

# possible to build pathological revlog where delta pattern would lead

605

# possible to build pathological revlog where delta pattern would lead

606

# to too many reads. However, they do not happen in practice at all. So

606

# to too many reads. However, they do not happen in practice at all. So

607

# we skip the span check entirely.

607

# we skip the span check entirely.

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

609

return False

609

return False

610

611

# Bad delta from new delta size:

611

# Bad delta from new delta size:

612

#

612

#

613

# If the delta size is larger than the target text, storing the

613

# If the delta size is larger than the target text, storing the

614

# delta will be inefficient.

614

# delta will be inefficient.

615

if textlen < deltainfo.deltalen:

615

if textlen < deltainfo.deltalen:

616

return False

616

return False

617

618

# Bad delta from cumulated payload size:

618

# Bad delta from cumulated payload size:

619

#

619

#

620

# If the sum of delta get larger than K * target text length.

620

# If the sum of delta get larger than K * target text length.

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

622

return False

622

return False

623

624

# Bad delta from chain length:

624

# Bad delta from chain length:

625

#

625

#

626

# If the number of delta in the chain gets too high.

626

# If the number of delta in the chain gets too high.

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

628

return False

628

return False

629

630

# bad delta from intermediate snapshot size limit

630

# bad delta from intermediate snapshot size limit

631

#

631

#

632

# If an intermediate snapshot size is higher than the limit. The

632

# If an intermediate snapshot size is higher than the limit. The

633

# limit exist to prevent endless chain of intermediate delta to be

633

# limit exist to prevent endless chain of intermediate delta to be

634

# created.

634

# created.

635

if (

635

if (

636

deltainfo.snapshotdepth is not None

636

deltainfo.snapshotdepth is not None

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

638

):

638

):

639

return False

639

return False

640

641

# bad delta if new intermediate snapshot is larger than the previous

641

# bad delta if new intermediate snapshot is larger than the previous

642

# snapshot

642

# snapshot

643

if (

643

if (

644

deltainfo.snapshotdepth

644

deltainfo.snapshotdepth

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

646

):

646

):

647

return False

647

return False

648

649

return True

649

return True

650

651

652

# If a revision's full text is that much bigger than a base candidate full

652

# If a revision's full text is that much bigger than a base candidate full

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

654

# consider these candidates.

654

# consider these candidates.

655

LIMIT_BASE2TEXT = 500

655

LIMIT_BASE2TEXT = 500

656

657

658

def _candidategroups(revlog, textlen, p1, p2, cachedelta):

658

def _candidategroups(

659

revlog,

660

textlen,

661

p1,

662

p2,

663

cachedelta,

664

):

659

"""Provides group of revision to be tested as delta base

665

"""Provides group of revision to be tested as delta base

660

666

661

This top level function focus on emitting groups with unique and worthwhile

667

This top level function focus on emitting groups with unique and worthwhile

662

content. See _raw_candidate_groups for details about the group order.

668

content. See _raw_candidate_groups for details about the group order.

663

"""

669

"""

664

# should we try to build a delta?

670

# should we try to build a delta?

665

if not (len(revlog) and revlog._storedeltachains):

671

if not (len(revlog) and revlog._storedeltachains):

666

yield None

672

yield None

667

return

673

return

668

674

669

deltalength = revlog.length

675

deltalength = revlog.length

670

deltaparent = revlog.deltaparent

676

deltaparent = revlog.deltaparent

671

sparse = revlog._sparserevlog

677

sparse = revlog._sparserevlog

672

good = None

678

good = None

673

679

674

deltas_limit = textlen * LIMIT_DELTA2TEXT

680

deltas_limit = textlen * LIMIT_DELTA2TEXT

675

681

676

tested = {nullrev}

682

tested = {nullrev}

677

candidates = _refinedgroups(

683

candidates = _refinedgroups(

678

revlog,

684

revlog,

679

p1,

685

p1,

680

p2,

686

p2,

681

cachedelta,

687

cachedelta,

682

)

688

)

683

while True:

689

while True:

684

temptative = candidates.send(good)

690

temptative = candidates.send(good)

685

if temptative is None:

691

if temptative is None:

686

break

692

break

687

group = []

693

group = []

688

for rev in temptative:

694

for rev in temptative:

689

# skip over empty delta (no need to include them in a chain)

695

# skip over empty delta (no need to include them in a chain)

690

while revlog._generaldelta and not (

696

while revlog._generaldelta and not (

691

rev == nullrev or rev in tested or deltalength(rev)

697

rev == nullrev or rev in tested or deltalength(rev)

692

):

698

):

693

tested.add(rev)

699

tested.add(rev)

694

rev = deltaparent(rev)

700

rev = deltaparent(rev)

695

# no need to try a delta against nullrev, this will be done as a

701

# no need to try a delta against nullrev, this will be done as a

696

# last resort.

702

# last resort.

697

if rev == nullrev:

703

if rev == nullrev:

698

continue

704

continue

699

# filter out revision we tested already

705

# filter out revision we tested already

700

if rev in tested:

706

if rev in tested:

701

continue

707

continue

702

tested.add(rev)

708

tested.add(rev)

703

# filter out delta base that will never produce good delta

709

# filter out delta base that will never produce good delta

704

if deltas_limit < revlog.length(rev):

710

if deltas_limit < revlog.length(rev):

705

continue

711

continue

706

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

712

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

707

continue

713

continue

708

# no delta for rawtext-changing revs (see "candelta" for why)

714

# no delta for rawtext-changing revs (see "candelta" for why)

709

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

715

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

710

continue

716

continue

711

717

712

# If we reach here, we are about to build and test a delta.

718

# If we reach here, we are about to build and test a delta.

713

# The delta building process will compute the chaininfo in all

719

# The delta building process will compute the chaininfo in all

714

# case, since that computation is cached, it is fine to access it

720

# case, since that computation is cached, it is fine to access it

715

# here too.

721

# here too.

716

chainlen, chainsize = revlog._chaininfo(rev)

722

chainlen, chainsize = revlog._chaininfo(rev)

717

# if chain will be too long, skip base

723

# if chain will be too long, skip base

718

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

724

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

719

continue

725

continue

720

# if chain already have too much data, skip base

726

# if chain already have too much data, skip base

721

if deltas_limit < chainsize:

727

if deltas_limit < chainsize:

722

continue

728

continue

723

if sparse and revlog.upperboundcomp is not None:

729

if sparse and revlog.upperboundcomp is not None:

724

maxcomp = revlog.upperboundcomp

730

maxcomp = revlog.upperboundcomp

725

basenotsnap = (p1, p2, nullrev)

731

basenotsnap = (p1, p2, nullrev)

726

if rev not in basenotsnap and revlog.issnapshot(rev):

732

if rev not in basenotsnap and revlog.issnapshot(rev):

727

snapshotdepth = revlog.snapshotdepth(rev)

733

snapshotdepth = revlog.snapshotdepth(rev)

728

# If text is significantly larger than the base, we can

734

# If text is significantly larger than the base, we can

729

# expect the resulting delta to be proportional to the size

735

# expect the resulting delta to be proportional to the size

730

# difference

736

# difference

731

revsize = revlog.rawsize(rev)

737

revsize = revlog.rawsize(rev)

732

rawsizedistance = max(textlen - revsize, 0)

738

rawsizedistance = max(textlen - revsize, 0)

733

# use an estimate of the compression upper bound.

739

# use an estimate of the compression upper bound.

734

lowestrealisticdeltalen = rawsizedistance // maxcomp

740

lowestrealisticdeltalen = rawsizedistance // maxcomp

735

741

736

# check the absolute constraint on the delta size

742

# check the absolute constraint on the delta size

737

snapshotlimit = textlen >> snapshotdepth

743

snapshotlimit = textlen >> snapshotdepth

738

if snapshotlimit < lowestrealisticdeltalen:

744

if snapshotlimit < lowestrealisticdeltalen:

739

# delta lower bound is larger than accepted upper bound

745

# delta lower bound is larger than accepted upper bound

740

continue

746

continue

741

747

742

# check the relative constraint on the delta size

748

# check the relative constraint on the delta size

743

revlength = revlog.length(rev)

749

revlength = revlog.length(rev)

744

if revlength < lowestrealisticdeltalen:

750

if revlength < lowestrealisticdeltalen:

745

# delta probable lower bound is larger than target base

751

# delta probable lower bound is larger than target base

746

continue

752

continue

747

753

748

group.append(rev)

754

group.append(rev)

749

if group:

755

if group:

750

# XXX: in the sparse revlog case, group can become large,

756

# XXX: in the sparse revlog case, group can become large,

751

# impacting performances. Some bounding or slicing mecanism

757

# impacting performances. Some bounding or slicing mecanism

752

# would help to reduce this impact.

758

# would help to reduce this impact.

753

good = yield tuple(group)

759

good = yield tuple(group)

754

yield None

760

yield None

755

761

756

762

757

def _findsnapshots(revlog, cache, start_rev):

763

def _findsnapshots(revlog, cache, start_rev):

758

"""find snapshot from start_rev to tip"""

764

"""find snapshot from start_rev to tip"""

759

if util.safehasattr(revlog.index, b'findsnapshots'):

765

if util.safehasattr(revlog.index, b'findsnapshots'):

760

revlog.index.findsnapshots(cache, start_rev)

766

revlog.index.findsnapshots(cache, start_rev)

761

else:

767

else:

762

deltaparent = revlog.deltaparent

768

deltaparent = revlog.deltaparent

763

issnapshot = revlog.issnapshot

769

issnapshot = revlog.issnapshot

764

for rev in revlog.revs(start_rev):

770

for rev in revlog.revs(start_rev):

765

if issnapshot(rev):

771

if issnapshot(rev):

766

cache[deltaparent(rev)].append(rev)

772

cache[deltaparent(rev)].append(rev)

767

773

768

774

769

def _refinedgroups(revlog, p1, p2, cachedelta):

775

def _refinedgroups(revlog, p1, p2, cachedelta):

770

good = None

776

good = None

771

# First we try to reuse a the delta contained in the bundle.

777

# First we try to reuse a the delta contained in the bundle.

772

# (or from the source revlog)

778

# (or from the source revlog)

773

#

779

#

774

# This logic only applies to general delta repositories and can be disabled

780

# This logic only applies to general delta repositories and can be disabled

775

# through configuration. Disabling reuse source delta is useful when

781

# through configuration. Disabling reuse source delta is useful when

776

# we want to make sure we recomputed "optimal" deltas.

782

# we want to make sure we recomputed "optimal" deltas.

777

debug_info = None

783

debug_info = None

778

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

784

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

779

# Assume what we received from the server is a good choice

785

# Assume what we received from the server is a good choice

780

# build delta will reuse the cache

786

# build delta will reuse the cache

781

if debug_info is not None:

787

if debug_info is not None:

782

debug_info['cached-delta.tested'] += 1

788

debug_info['cached-delta.tested'] += 1

783

good = yield (cachedelta[0],)

789

good = yield (cachedelta[0],)

784

if good is not None:

790

if good is not None:

785

if debug_info is not None:

791

if debug_info is not None:

786

debug_info['cached-delta.accepted'] += 1

792

debug_info['cached-delta.accepted'] += 1

787

yield None

793

yield None

788

return

794

return

789

# XXX cache me higher

795

# XXX cache me higher

790

snapshots = collections.defaultdict(list)

796

snapshots = collections.defaultdict(list)

791

groups = _rawgroups(

797

groups = _rawgroups(

792

revlog,

798

revlog,

793

p1,

799

p1,

794

p2,

800

p2,

795

cachedelta,

801

cachedelta,

796

snapshots,

802

snapshots,

797

)

803

)

798

for candidates in groups:

804

for candidates in groups:

799

good = yield candidates

805

good = yield candidates

800

if good is not None:

806

if good is not None:

801

break

807

break

802

808

803

# If sparse revlog is enabled, we can try to refine the available deltas

809

# If sparse revlog is enabled, we can try to refine the available deltas

804

if not revlog._sparserevlog:

810

if not revlog._sparserevlog:

805

yield None

811

yield None

806

return

812

return

807

813

808

# if we have a refinable value, try to refine it

814

# if we have a refinable value, try to refine it

809

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

815

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

810

# refine snapshot down

816

# refine snapshot down

811

previous = None

817

previous = None

812

while previous != good:

818

while previous != good:

813

previous = good

819

previous = good

814

base = revlog.deltaparent(good)

820

base = revlog.deltaparent(good)

815

if base == nullrev:

821

if base == nullrev:

816

break

822

break

817

good = yield (base,)

823

good = yield (base,)

818

# refine snapshot up

824

# refine snapshot up

819

if not snapshots:

825

if not snapshots:

820

_findsnapshots(revlog, snapshots, good + 1)

826

_findsnapshots(revlog, snapshots, good + 1)

821

previous = None

827

previous = None

822

while good != previous:

828

while good != previous:

823

previous = good

829

previous = good

824

children = tuple(sorted(c for c in snapshots[good]))

830

children = tuple(sorted(c for c in snapshots[good]))

825

good = yield children

831

good = yield children

826

832

827

if debug_info is not None:

833

if debug_info is not None:

828

if good is None:

834

if good is None:

829

debug_info['no-solution'] += 1

835

debug_info['no-solution'] += 1

830

836

831

yield None

837

yield None

832

838

833

839

834

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

840

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

835

"""Provides group of revision to be tested as delta base

841

"""Provides group of revision to be tested as delta base

836

842

837

This lower level function focus on emitting delta theorically interresting

843

This lower level function focus on emitting delta theorically interresting

838

without looking it any practical details.

844

without looking it any practical details.

839

845

840

The group order aims at providing fast or small candidates first.

846

The group order aims at providing fast or small candidates first.

841

"""

847

"""

842

gdelta = revlog._generaldelta

848

gdelta = revlog._generaldelta

843

# gate sparse behind general-delta because of issue6056

849

# gate sparse behind general-delta because of issue6056

844

sparse = gdelta and revlog._sparserevlog

850

sparse = gdelta and revlog._sparserevlog

845

curr = len(revlog)

851

curr = len(revlog)

846

prev = curr - 1

852

prev = curr - 1

847

deltachain = lambda rev: revlog._deltachain(rev)[0]

853

deltachain = lambda rev: revlog._deltachain(rev)[0]

848

854

849

if gdelta:

855

if gdelta:

850

# exclude already lazy tested base if any

856

# exclude already lazy tested base if any

851

parents = [p for p in (p1, p2) if p != nullrev]

857

parents = [p for p in (p1, p2) if p != nullrev]

852

858

853

if not revlog._deltabothparents and len(parents) == 2:

859

if not revlog._deltabothparents and len(parents) == 2:

854

parents.sort()

860

parents.sort()

855

# To minimize the chance of having to build a fulltext,

861

# To minimize the chance of having to build a fulltext,

856

# pick first whichever parent is closest to us (max rev)

862

# pick first whichever parent is closest to us (max rev)

857

yield (parents[1],)

863

yield (parents[1],)

858

# then the other one (min rev) if the first did not fit

864

# then the other one (min rev) if the first did not fit

859

yield (parents[0],)

865

yield (parents[0],)

860

elif len(parents) > 0:

866

elif len(parents) > 0:

861

# Test all parents (1 or 2), and keep the best candidate

867

# Test all parents (1 or 2), and keep the best candidate

862

yield parents

868

yield parents

863

869

864

if sparse and parents:

870

if sparse and parents:

865

if snapshots is None:

871

if snapshots is None:

866

# map: base-rev: snapshot-rev

872

# map: base-rev: snapshot-rev

867

snapshots = collections.defaultdict(list)

873

snapshots = collections.defaultdict(list)

868

# See if we can use an existing snapshot in the parent chains to use as

874

# See if we can use an existing snapshot in the parent chains to use as

869

# a base for a new intermediate-snapshot

875

# a base for a new intermediate-snapshot

870

#

876

#

871

# search for snapshot in parents delta chain

877

# search for snapshot in parents delta chain

872

# map: snapshot-level: snapshot-rev

878

# map: snapshot-level: snapshot-rev

873

parents_snaps = collections.defaultdict(set)

879

parents_snaps = collections.defaultdict(set)

874

candidate_chains = [deltachain(p) for p in parents]

880

candidate_chains = [deltachain(p) for p in parents]

875

for chain in candidate_chains:

881

for chain in candidate_chains:

876

for idx, s in enumerate(chain):

882

for idx, s in enumerate(chain):

877

if not revlog.issnapshot(s):

883

if not revlog.issnapshot(s):

878

break

884

break

879

parents_snaps[idx].add(s)

885

parents_snaps[idx].add(s)

880

snapfloor = min(parents_snaps[0]) + 1

886

snapfloor = min(parents_snaps[0]) + 1

881

_findsnapshots(revlog, snapshots, snapfloor)

887

_findsnapshots(revlog, snapshots, snapfloor)

882

# search for the highest "unrelated" revision

888

# search for the highest "unrelated" revision

883

#

889

#

884

# Adding snapshots used by "unrelated" revision increase the odd we

890

# Adding snapshots used by "unrelated" revision increase the odd we

885

# reuse an independant, yet better snapshot chain.

891

# reuse an independant, yet better snapshot chain.

886

#

892

#

887

# XXX instead of building a set of revisions, we could lazily enumerate

893

# XXX instead of building a set of revisions, we could lazily enumerate

888

# over the chains. That would be more efficient, however we stick to

894

# over the chains. That would be more efficient, however we stick to

889

# simple code for now.

895

# simple code for now.

890

all_revs = set()

896

all_revs = set()

891

for chain in candidate_chains:

897

for chain in candidate_chains:

892

all_revs.update(chain)

898

all_revs.update(chain)

893

other = None

899

other = None

894

for r in revlog.revs(prev, snapfloor):

900

for r in revlog.revs(prev, snapfloor):

895

if r not in all_revs:

901

if r not in all_revs:

896

other = r

902

other = r

897

break

903

break

898

if other is not None:

904

if other is not None:

899

# To avoid unfair competition, we won't use unrelated intermediate

905

# To avoid unfair competition, we won't use unrelated intermediate

900

# snapshot that are deeper than the ones from the parent delta

906

# snapshot that are deeper than the ones from the parent delta

901

# chain.

907

# chain.

902

max_depth = max(parents_snaps.keys())

908

max_depth = max(parents_snaps.keys())

903

chain = deltachain(other)

909

chain = deltachain(other)

904

for idx, s in enumerate(chain):

910

for idx, s in enumerate(chain):

905

if s < snapfloor:

911

if s < snapfloor:

906

continue

912

continue

907

if max_depth < idx:

913

if max_depth < idx:

908

break

914

break

909

if not revlog.issnapshot(s):

915

if not revlog.issnapshot(s):

910

break

916

break

911

parents_snaps[idx].add(s)

917

parents_snaps[idx].add(s)

912

# Test them as possible intermediate snapshot base

918

# Test them as possible intermediate snapshot base

913

# We test them from highest to lowest level. High level one are more

919

# We test them from highest to lowest level. High level one are more

914

# likely to result in small delta

920

# likely to result in small delta

915

floor = None

921

floor = None

916

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

922

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

917

siblings = set()

923

siblings = set()

918

for s in snaps:

924

for s in snaps:

919

siblings.update(snapshots[s])

925

siblings.update(snapshots[s])

920

# Before considering making a new intermediate snapshot, we check

926

# Before considering making a new intermediate snapshot, we check

921

# if an existing snapshot, children of base we consider, would be

927

# if an existing snapshot, children of base we consider, would be

922

# suitable.

928

# suitable.

923

#

929

#

924

# It give a change to reuse a delta chain "unrelated" to the

930

# It give a change to reuse a delta chain "unrelated" to the

925

# current revision instead of starting our own. Without such

931

# current revision instead of starting our own. Without such

926

# re-use, topological branches would keep reopening new chains.

932

# re-use, topological branches would keep reopening new chains.

927

# Creating more and more snapshot as the repository grow.

933

# Creating more and more snapshot as the repository grow.

928

934

929

if floor is not None:

935

if floor is not None:

930

# We only do this for siblings created after the one in our

936

# We only do this for siblings created after the one in our

931

# parent's delta chain. Those created before has less chances

937

# parent's delta chain. Those created before has less chances

932

# to be valid base since our ancestors had to create a new

938

# to be valid base since our ancestors had to create a new

933

# snapshot.

939

# snapshot.

934

siblings = [r for r in siblings if floor < r]

940

siblings = [r for r in siblings if floor < r]

935

yield tuple(sorted(siblings))

941

yield tuple(sorted(siblings))

936

# then test the base from our parent's delta chain.

942

# then test the base from our parent's delta chain.

937

yield tuple(sorted(snaps))

943

yield tuple(sorted(snaps))

938

floor = min(snaps)

944

floor = min(snaps)

939

# No suitable base found in the parent chain, search if any full

945

# No suitable base found in the parent chain, search if any full

940

# snapshots emitted since parent's base would be a suitable base for an

946

# snapshots emitted since parent's base would be a suitable base for an

941

# intermediate snapshot.

947

# intermediate snapshot.

942

#

948

#

943

# It give a chance to reuse a delta chain unrelated to the current

949

# It give a chance to reuse a delta chain unrelated to the current

944

# revisions instead of starting our own. Without such re-use,

950

# revisions instead of starting our own. Without such re-use,

945

# topological branches would keep reopening new full chains. Creating

951

# topological branches would keep reopening new full chains. Creating

946

# more and more snapshot as the repository grow.

952

# more and more snapshot as the repository grow.

947

yield tuple(snapshots[nullrev])

953

yield tuple(snapshots[nullrev])

948

954

949

if not sparse:

955

if not sparse:

950

# other approach failed try against prev to hopefully save us a

956

# other approach failed try against prev to hopefully save us a

951

# fulltext.

957

# fulltext.

952

yield (prev,)

958

yield (prev,)

953

959

954

960

955

class deltacomputer:

961

class deltacomputer:

956

def __init__(

962

def __init__(

957

self,

963

self,

958

revlog,

964

revlog,

959

write_debug=None,

965

write_debug=None,

960

debug_search=False,

966

debug_search=False,

961

debug_info=None,

967

debug_info=None,

962

):

968

):

963

self.revlog = revlog

969

self.revlog = revlog

964

self._write_debug = write_debug

970

self._write_debug = write_debug

965

self._debug_search = debug_search

971

self._debug_search = debug_search

966

self._debug_info = debug_info

972

self._debug_info = debug_info

967

973

968

def buildtext(self, revinfo, fh):

974

def buildtext(self, revinfo, fh):

969

"""Builds a fulltext version of a revision

975

"""Builds a fulltext version of a revision

970

976

971

revinfo: revisioninfo instance that contains all needed info

977

revinfo: revisioninfo instance that contains all needed info

972

fh: file handle to either the .i or the .d revlog file,

978

fh: file handle to either the .i or the .d revlog file,

973

depending on whether it is inlined or not

979

depending on whether it is inlined or not

974

"""

980

"""

975

btext = revinfo.btext

981

btext = revinfo.btext

976

if btext[0] is not None:

982

if btext[0] is not None:

977

return btext[0]

983

return btext[0]

978

984

979

revlog = self.revlog

985

revlog = self.revlog

980

cachedelta = revinfo.cachedelta

986

cachedelta = revinfo.cachedelta

981

baserev = cachedelta[0]

987

baserev = cachedelta[0]

982

delta = cachedelta[1]

988

delta = cachedelta[1]

983

989

984

fulltext = btext[0] = _textfromdelta(

990

fulltext = btext[0] = _textfromdelta(

985

fh,

991

fh,

986

revlog,

992

revlog,

987

baserev,

993

baserev,

988

delta,

994

delta,

989

revinfo.p1,

995

revinfo.p1,

990

revinfo.p2,

996

revinfo.p2,

991

revinfo.flags,

997

revinfo.flags,

992

revinfo.node,

998

revinfo.node,

993

)

999

)

994

return fulltext

1000

return fulltext

995

1001

996

def _builddeltadiff(self, base, revinfo, fh):

1002

def _builddeltadiff(self, base, revinfo, fh):

997

revlog = self.revlog

1003

revlog = self.revlog

998

t = self.buildtext(revinfo, fh)

1004

t = self.buildtext(revinfo, fh)

999

if revlog.iscensored(base):

1005

if revlog.iscensored(base):

1000

# deltas based on a censored revision must replace the

1006

# deltas based on a censored revision must replace the

1001

# full content in one patch, so delta works everywhere

1007

# full content in one patch, so delta works everywhere

1002

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1008

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1003

delta = header + t

1009

delta = header + t

1004

else:

1010

else:

1005

ptext = revlog.rawdata(base, _df=fh)

1011

ptext = revlog.rawdata(base, _df=fh)

1006

delta = mdiff.textdiff(ptext, t)

1012

delta = mdiff.textdiff(ptext, t)

1007

1013

1008

return delta

1014

return delta

1009

1015

1010

def _builddeltainfo(self, revinfo, base, fh):

1016

def _builddeltainfo(self, revinfo, base, fh):

1011

# can we use the cached delta?

1017

# can we use the cached delta?

1012

revlog = self.revlog

1018

revlog = self.revlog

1013

debug_search = self._write_debug is not None and self._debug_search

1019

debug_search = self._write_debug is not None and self._debug_search

1014

chainbase = revlog.chainbase(base)

1020

chainbase = revlog.chainbase(base)

1015

if revlog._generaldelta:

1021

if revlog._generaldelta:

1016

deltabase = base

1022

deltabase = base

1017

else:

1023

else:

1018

deltabase = chainbase

1024

deltabase = chainbase

1019

snapshotdepth = None

1025

snapshotdepth = None

1020

if revlog._sparserevlog and deltabase == nullrev:

1026

if revlog._sparserevlog and deltabase == nullrev:

1021

snapshotdepth = 0

1027

snapshotdepth = 0

1022

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1028

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1023

# A delta chain should always be one full snapshot,

1029

# A delta chain should always be one full snapshot,

1024

# zero or more semi-snapshots, and zero or more deltas

1030

# zero or more semi-snapshots, and zero or more deltas

1025

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1031

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1026

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1032

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1027

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1033

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1028

delta = None

1034

delta = None

1029

if revinfo.cachedelta:

1035

if revinfo.cachedelta:

1030

cachebase, cachediff = revinfo.cachedelta

1036

cachebase, cachediff = revinfo.cachedelta

1031

# check if the diff still apply

1037

# check if the diff still apply

1032

currentbase = cachebase

1038

currentbase = cachebase

1033

while (

1039

while (

1034

currentbase != nullrev

1040

currentbase != nullrev

1035

and currentbase != base

1041

and currentbase != base

1036

and self.revlog.length(currentbase) == 0

1042

and self.revlog.length(currentbase) == 0

1037

):

1043

):

1038

currentbase = self.revlog.deltaparent(currentbase)

1044

currentbase = self.revlog.deltaparent(currentbase)

1039

if self.revlog._lazydelta and currentbase == base:

1045

if self.revlog._lazydelta and currentbase == base:

1040

delta = revinfo.cachedelta[1]

1046

delta = revinfo.cachedelta[1]

1041

if delta is None:

1047

if delta is None:

1042

delta = self._builddeltadiff(base, revinfo, fh)

1048

delta = self._builddeltadiff(base, revinfo, fh)

1043

if debug_search:

1049

if debug_search:

1044

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1050

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1045

msg %= len(delta)

1051

msg %= len(delta)

1046

self._write_debug(msg)

1052

self._write_debug(msg)

1047

# snapshotdept need to be neither None nor 0 level snapshot

1053

# snapshotdept need to be neither None nor 0 level snapshot

1048

if revlog.upperboundcomp is not None and snapshotdepth:

1054

if revlog.upperboundcomp is not None and snapshotdepth:

1049

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1055

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1050

snapshotlimit = revinfo.textlen >> snapshotdepth

1056

snapshotlimit = revinfo.textlen >> snapshotdepth

1051

if debug_search:

1057

if debug_search:

1052

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1058

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1053

msg %= lowestrealisticdeltalen

1059

msg %= lowestrealisticdeltalen

1054

self._write_debug(msg)

1060

self._write_debug(msg)

1055

if snapshotlimit < lowestrealisticdeltalen:

1061

if snapshotlimit < lowestrealisticdeltalen:

1056

if debug_search:

1062

if debug_search:

1057

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1063

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1058

self._write_debug(msg)

1064

self._write_debug(msg)

1059

return None

1065

return None

1060

if revlog.length(base) < lowestrealisticdeltalen:

1066

if revlog.length(base) < lowestrealisticdeltalen:

1061

if debug_search:

1067

if debug_search:

1062

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1068

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1063

self._write_debug(msg)

1069

self._write_debug(msg)

1064

return None

1070

return None

1065

header, data = revlog.compress(delta)

1071

header, data = revlog.compress(delta)

1066

deltalen = len(header) + len(data)

1072

deltalen = len(header) + len(data)

1067

offset = revlog.end(len(revlog) - 1)

1073

offset = revlog.end(len(revlog) - 1)

1068

dist = deltalen + offset - revlog.start(chainbase)

1074

dist = deltalen + offset - revlog.start(chainbase)

1069

chainlen, compresseddeltalen = revlog._chaininfo(base)

1075

chainlen, compresseddeltalen = revlog._chaininfo(base)

1070

chainlen += 1

1076

chainlen += 1

1071

compresseddeltalen += deltalen

1077

compresseddeltalen += deltalen

1072

1078

1073

return _deltainfo(

1079

return _deltainfo(

1074

dist,

1080

dist,

1075

deltalen,

1081

deltalen,

1076

(header, data),

1082

(header, data),

1077

deltabase,

1083

deltabase,

1078

chainbase,

1084

chainbase,

1079

chainlen,

1085

chainlen,

1080

compresseddeltalen,

1086

compresseddeltalen,

1081

snapshotdepth,

1087

snapshotdepth,

1082

)

1088

)

1083

1089

1084

def _fullsnapshotinfo(self, fh, revinfo, curr):

1090

def _fullsnapshotinfo(self, fh, revinfo, curr):

1085

rawtext = self.buildtext(revinfo, fh)

1091

rawtext = self.buildtext(revinfo, fh)

1086

data = self.revlog.compress(rawtext)

1092

data = self.revlog.compress(rawtext)

1087

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1093

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1088

deltabase = chainbase = curr

1094

deltabase = chainbase = curr

1089

snapshotdepth = 0

1095

snapshotdepth = 0

1090

chainlen = 1

1096

chainlen = 1

1091

1097

1092

return _deltainfo(

1098

return _deltainfo(

1093

dist,

1099

dist,

1094

deltalen,

1100

deltalen,

1095

data,

1101

data,

1096

deltabase,

1102

deltabase,

1097

chainbase,

1103

chainbase,

1098

chainlen,

1104

chainlen,

1099

compresseddeltalen,

1105

compresseddeltalen,

1100

snapshotdepth,

1106

snapshotdepth,

1101

)

1107

)

1102

1108

1103

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1109

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1104

"""Find an acceptable delta against a candidate revision

1110

"""Find an acceptable delta against a candidate revision

1105

1111

1106

revinfo: information about the revision (instance of _revisioninfo)

1112

revinfo: information about the revision (instance of _revisioninfo)

1107

fh: file handle to either the .i or the .d revlog file,

1113

fh: file handle to either the .i or the .d revlog file,

1108

depending on whether it is inlined or not

1114

depending on whether it is inlined or not

1109

1115

1110

Returns the first acceptable candidate revision, as ordered by

1116

Returns the first acceptable candidate revision, as ordered by

1111

_candidategroups

1117

_candidategroups

1112

1118

1113

If no suitable deltabase is found, we return delta info for a full

1119

If no suitable deltabase is found, we return delta info for a full

1114

snapshot.

1120

snapshot.

1115

1121

1116

`excluded_bases` is an optional set of revision that cannot be used as

1122

`excluded_bases` is an optional set of revision that cannot be used as

1117

a delta base. Use this to recompute delta suitable in censor or strip

1123

a delta base. Use this to recompute delta suitable in censor or strip

1118

context.

1124

context.

1119

"""

1125

"""

1120

if target_rev is None:

1126

if target_rev is None:

1121

target_rev = len(self.revlog)

1127

target_rev = len(self.revlog)

1122

1128

1123

if not revinfo.textlen:

1129

if not revinfo.textlen:

1124

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1130

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1125

1131

1126

if excluded_bases is None:

1132

if excluded_bases is None:

1127

excluded_bases = set()

1133

excluded_bases = set()

1128

1134

1129

# no delta for flag processor revision (see "candelta" for why)

1135

# no delta for flag processor revision (see "candelta" for why)

1130

# not calling candelta since only one revision needs test, also to

1136

# not calling candelta since only one revision needs test, also to

1131

# avoid overhead fetching flags again.

1137

# avoid overhead fetching flags again.

1132

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1138

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1133

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1139

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1134

1140

1135

gather_debug = (

1141

gather_debug = (

1136

self._write_debug is not None or self._debug_info is not None

1142

self._write_debug is not None or self._debug_info is not None

1137

)

1143

)

1138

debug_search = self._write_debug is not None and self._debug_search

1144

debug_search = self._write_debug is not None and self._debug_search

1139

1145

1140

if gather_debug:

1146

if gather_debug:

1141

start = util.timer()

1147

start = util.timer()

1142

1148

1143

# count the number of different delta we tried (for debug purpose)

1149

# count the number of different delta we tried (for debug purpose)

1144

dbg_try_count = 0

1150

dbg_try_count = 0

1145

# count the number of "search round" we did. (for debug purpose)

1151

# count the number of "search round" we did. (for debug purpose)

1146

dbg_try_rounds = 0

1152

dbg_try_rounds = 0

1147

dbg_type = b'unknown'

1153

dbg_type = b'unknown'

1148

1154

1149

cachedelta = revinfo.cachedelta

1155

cachedelta = revinfo.cachedelta

1150

p1 = revinfo.p1

1156

p1 = revinfo.p1

1151

p2 = revinfo.p2

1157

p2 = revinfo.p2

1152

revlog = self.revlog

1158

revlog = self.revlog

1153

1159

1154

deltainfo = None

1160

deltainfo = None

1155

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1161

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1156

1162

1157

if gather_debug:

1163

if gather_debug:

1158

if p1r != nullrev:

1164

if p1r != nullrev:

1159

p1_chain_len = revlog._chaininfo(p1r)[0]

1165

p1_chain_len = revlog._chaininfo(p1r)[0]

1160

else:

1166

else:

1161

p1_chain_len = -1

1167

p1_chain_len = -1

1162

if p2r != nullrev:

1168

if p2r != nullrev:

1163

p2_chain_len = revlog._chaininfo(p2r)[0]

1169

p2_chain_len = revlog._chaininfo(p2r)[0]

1164

else:

1170

else:

1165

p2_chain_len = -1

1171

p2_chain_len = -1

1166

if debug_search:

1172

if debug_search:

1167

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1173

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1168

msg %= target_rev

1174

msg %= target_rev

1169

self._write_debug(msg)

1175

self._write_debug(msg)

1170

1176

1171

groups = _candidategroups(

1177

groups = _candidategroups(

1172

self.revlog, revinfo.textlen, p1r, p2r, cachedelta

1178

self.revlog,

1179

revinfo.textlen,

1180

p1r,

1181

p2r,

1182

cachedelta,

1173

)

1183

)

1174

candidaterevs = next(groups)

1184

candidaterevs = next(groups)

1175

while candidaterevs is not None:

1185

while candidaterevs is not None:

1176

dbg_try_rounds += 1

1186

dbg_try_rounds += 1

1177

if debug_search:

1187

if debug_search:

1178

prev = None

1188

prev = None

1179

if deltainfo is not None:

1189

if deltainfo is not None:

1180

prev = deltainfo.base

1190

prev = deltainfo.base

1181

1191

1182

if (

1192

if (

1183

cachedelta is not None

1193

cachedelta is not None

1184

and len(candidaterevs) == 1

1194

and len(candidaterevs) == 1

1185

and cachedelta[0] in candidaterevs

1195

and cachedelta[0] in candidaterevs

1186

):

1196

):

1187

round_type = b"cached-delta"

1197

round_type = b"cached-delta"

1188

elif p1 in candidaterevs or p2 in candidaterevs:

1198

elif p1 in candidaterevs or p2 in candidaterevs:

1189

round_type = b"parents"

1199

round_type = b"parents"

1190

elif prev is not None and all(c < prev for c in candidaterevs):

1200

elif prev is not None and all(c < prev for c in candidaterevs):

1191

round_type = b"refine-down"

1201

round_type = b"refine-down"

1192

elif prev is not None and all(c > prev for c in candidaterevs):

1202

elif prev is not None and all(c > prev for c in candidaterevs):

1193

round_type = b"refine-up"

1203

round_type = b"refine-up"

1194

else:

1204

else:

1195

round_type = b"search-down"

1205

round_type = b"search-down"

1196

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1206

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1197

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1207

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1198

self._write_debug(msg)

1208

self._write_debug(msg)

1199

nominateddeltas = []

1209

nominateddeltas = []

1200

if deltainfo is not None:

1210

if deltainfo is not None:

1201

if debug_search:

1211

if debug_search:

1202

msg = (

1212

msg = (

1203

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1213

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1204

)

1214

)

1205

msg %= (deltainfo.base, deltainfo.deltalen)

1215

msg %= (deltainfo.base, deltainfo.deltalen)

1206

self._write_debug(msg)

1216

self._write_debug(msg)

1207

# if we already found a good delta,

1217

# if we already found a good delta,

1208

# challenge it against refined candidates

1218

# challenge it against refined candidates

1209

nominateddeltas.append(deltainfo)

1219

nominateddeltas.append(deltainfo)

1210

for candidaterev in candidaterevs:

1220

for candidaterev in candidaterevs:

1211

if debug_search:

1221

if debug_search:

1212

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1222

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1213

msg %= candidaterev

1223

msg %= candidaterev

1214

self._write_debug(msg)

1224

self._write_debug(msg)

1215

candidate_type = None

1225

candidate_type = None

1216

if candidaterev == p1:

1226

if candidaterev == p1:

1217

candidate_type = b"p1"

1227

candidate_type = b"p1"

1218

elif candidaterev == p2:

1228

elif candidaterev == p2:

1219

candidate_type = b"p2"

1229

candidate_type = b"p2"

1220

elif self.revlog.issnapshot(candidaterev):

1230

elif self.revlog.issnapshot(candidaterev):

1221

candidate_type = b"snapshot-%d"

1231

candidate_type = b"snapshot-%d"

1222

candidate_type %= self.revlog.snapshotdepth(

1232

candidate_type %= self.revlog.snapshotdepth(

1223

candidaterev

1233

candidaterev

1224

)

1234

)

1225

1235

1226

if candidate_type is not None:

1236

if candidate_type is not None:

1227

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1237

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1228

msg %= candidate_type

1238

msg %= candidate_type

1229

self._write_debug(msg)

1239

self._write_debug(msg)

1230

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1240

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1231

msg %= self.revlog.length(candidaterev)

1241

msg %= self.revlog.length(candidaterev)

1232

self._write_debug(msg)

1242

self._write_debug(msg)

1233

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1243

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1234

msg %= self.revlog.deltaparent(candidaterev)

1244

msg %= self.revlog.deltaparent(candidaterev)

1235

self._write_debug(msg)

1245

self._write_debug(msg)

1236

if candidaterev in excluded_bases:

1246

if candidaterev in excluded_bases:

1237

if debug_search:

1247

if debug_search:

1238

msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"

1248

msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"

1239

self._write_debug(msg)

1249

self._write_debug(msg)

1240

continue

1250

continue

1241

if candidaterev >= target_rev:

1251

if candidaterev >= target_rev:

1242

if debug_search:

1252

if debug_search:

1243

msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"

1253

msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"

1244

self._write_debug(msg)

1254

self._write_debug(msg)

1245

continue

1255

continue

1246

dbg_try_count += 1

1256

dbg_try_count += 1

1247

1257

1248

if debug_search:

1258

if debug_search:

1249

delta_start = util.timer()

1259

delta_start = util.timer()

1250

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1260

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1251

if debug_search:

1261

if debug_search:

1252

delta_end = util.timer()

1262

delta_end = util.timer()

1253

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1263

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1254

msg %= delta_end - delta_start

1264

msg %= delta_end - delta_start

1255

self._write_debug(msg)

1265

self._write_debug(msg)

1256

if candidatedelta is not None:

1266

if candidatedelta is not None:

1257

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1267

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1258

if debug_search:

1268

if debug_search:

1259

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1269

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1260

msg %= candidatedelta.deltalen

1270

msg %= candidatedelta.deltalen

1261

self._write_debug(msg)

1271

self._write_debug(msg)

1262

nominateddeltas.append(candidatedelta)

1272

nominateddeltas.append(candidatedelta)

1263

elif debug_search:

1273

elif debug_search:

1264

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1274

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1265

msg %= candidatedelta.deltalen

1275

msg %= candidatedelta.deltalen

1266

self._write_debug(msg)

1276

self._write_debug(msg)

1267

elif debug_search:

1277

elif debug_search:

1268

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1278

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1269

self._write_debug(msg)

1279

self._write_debug(msg)

1270

if nominateddeltas:

1280

if nominateddeltas:

1271

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1281

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1272

if deltainfo is not None:

1282

if deltainfo is not None:

1273

candidaterevs = groups.send(deltainfo.base)

1283

candidaterevs = groups.send(deltainfo.base)

1274

else:

1284

else:

1275

candidaterevs = next(groups)

1285

candidaterevs = next(groups)

1276

1286

1277

if deltainfo is None:

1287

if deltainfo is None:

1278

dbg_type = b"full"

1288

dbg_type = b"full"

1279

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1289

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1280

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1290

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1281

dbg_type = b"snapshot"

1291

dbg_type = b"snapshot"

1282

else:

1292

else:

1283

dbg_type = b"delta"

1293

dbg_type = b"delta"

1284

1294

1285

if gather_debug:

1295

if gather_debug:

1286

end = util.timer()

1296

end = util.timer()

1287

used_cached = (

1297

used_cached = (

1288

cachedelta is not None

1298

cachedelta is not None

1289

and dbg_try_rounds == 1

1299

and dbg_try_rounds == 1

1290

and dbg_try_count == 1

1300

and dbg_try_count == 1

1291

and deltainfo.base == cachedelta[0]

1301

and deltainfo.base == cachedelta[0]

1292

)

1302

)

1293

dbg = {

1303

dbg = {

1294

'duration': end - start,

1304

'duration': end - start,

1295

'revision': target_rev,

1305

'revision': target_rev,

1296

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1306

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1297

'search_round_count': dbg_try_rounds,

1307

'search_round_count': dbg_try_rounds,

1298

'using-cached-base': used_cached,

1308

'using-cached-base': used_cached,

1299

'delta_try_count': dbg_try_count,

1309

'delta_try_count': dbg_try_count,

1300

'type': dbg_type,

1310

'type': dbg_type,

1301

'p1-chain-len': p1_chain_len,

1311

'p1-chain-len': p1_chain_len,

1302

'p2-chain-len': p2_chain_len,

1312

'p2-chain-len': p2_chain_len,

1303

}

1313

}

1304

if (

1314

if (

1305

deltainfo.snapshotdepth # pytype: disable=attribute-error

1315

deltainfo.snapshotdepth # pytype: disable=attribute-error

1306

is not None

1316

is not None

1307

):

1317

):

1308

dbg[

1318

dbg[

1309

'snapshot-depth'

1319

'snapshot-depth'

1310

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1320

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1311

else:

1321

else:

1312

dbg['snapshot-depth'] = 0

1322

dbg['snapshot-depth'] = 0

1313

target_revlog = b"UNKNOWN"

1323

target_revlog = b"UNKNOWN"

1314

target_type = self.revlog.target[0]

1324

target_type = self.revlog.target[0]

1315

target_key = self.revlog.target[1]

1325

target_key = self.revlog.target[1]

1316

if target_type == KIND_CHANGELOG:

1326

if target_type == KIND_CHANGELOG:

1317

target_revlog = b'CHANGELOG:'

1327

target_revlog = b'CHANGELOG:'

1318

elif target_type == KIND_MANIFESTLOG:

1328

elif target_type == KIND_MANIFESTLOG:

1319

target_revlog = b'MANIFESTLOG:'

1329

target_revlog = b'MANIFESTLOG:'

1320

if target_key:

1330

if target_key:

1321

target_revlog += b'%s:' % target_key

1331

target_revlog += b'%s:' % target_key

1322

elif target_type == KIND_FILELOG:

1332

elif target_type == KIND_FILELOG:

1323

target_revlog = b'FILELOG:'

1333

target_revlog = b'FILELOG:'

1324

if target_key:

1334

if target_key:

1325

target_revlog += b'%s:' % target_key

1335

target_revlog += b'%s:' % target_key

1326

dbg['target-revlog'] = target_revlog

1336

dbg['target-revlog'] = target_revlog

1327

1337

1328

if self._debug_info is not None:

1338

if self._debug_info is not None:

1329

self._debug_info.append(dbg)

1339

self._debug_info.append(dbg)

1330

1340

1331

if self._write_debug is not None:

1341

if self._write_debug is not None:

1332

msg = (

1342

msg = (

1333

b"DBG-DELTAS:"

1343

b"DBG-DELTAS:"

1334

b" %-12s"

1344

b" %-12s"

1335

b" rev=%d:"

1345

b" rev=%d:"

1336

b" delta-base=%d"

1346

b" delta-base=%d"

1337

b" is-cached=%d"

1347

b" is-cached=%d"

1338

b" - search-rounds=%d"

1348

b" - search-rounds=%d"

1339

b" try-count=%d"

1349

b" try-count=%d"

1340

b" - delta-type=%-6s"

1350

b" - delta-type=%-6s"

1341

b" snap-depth=%d"

1351

b" snap-depth=%d"

1342

b" - p1-chain-length=%d"

1352

b" - p1-chain-length=%d"

1343

b" p2-chain-length=%d"

1353

b" p2-chain-length=%d"

1344

b" - duration=%f"

1354

b" - duration=%f"

1345

b"\n"

1355

b"\n"

1346

)

1356

)

1347

msg %= (

1357

msg %= (

1348

dbg["target-revlog"],

1358

dbg["target-revlog"],

1349

dbg["revision"],

1359

dbg["revision"],

1350

dbg["delta-base"],

1360

dbg["delta-base"],

1351

dbg["using-cached-base"],

1361

dbg["using-cached-base"],

1352

dbg["search_round_count"],

1362

dbg["search_round_count"],

1353

dbg["delta_try_count"],

1363

dbg["delta_try_count"],

1354

dbg["type"],

1364

dbg["type"],

1355

dbg["snapshot-depth"],

1365

dbg["snapshot-depth"],

1356

dbg["p1-chain-len"],

1366

dbg["p1-chain-len"],

1357

dbg["p2-chain-len"],

1367

dbg["p2-chain-len"],

1358

dbg["duration"],

1368

dbg["duration"],

1359

)

1369

)

1360

self._write_debug(msg)

1370

self._write_debug(msg)

1361

return deltainfo

1371

return deltainfo

1362

1372

1363

1373

1364

def delta_compression(default_compression_header, deltainfo):

1374

def delta_compression(default_compression_header, deltainfo):

1365

"""return (COMPRESSION_MODE, deltainfo)

1375

"""return (COMPRESSION_MODE, deltainfo)

1366

1376

1367

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1377

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1368

compression.

1378

compression.

1369

"""

1379

"""

1370

h, d = deltainfo.data

1380

h, d = deltainfo.data

1371

compression_mode = COMP_MODE_INLINE

1381

compression_mode = COMP_MODE_INLINE

1372

if not h and not d:

1382

if not h and not d:

1373

# not data to store at all... declare them uncompressed

1383

# not data to store at all... declare them uncompressed

1374

compression_mode = COMP_MODE_PLAIN

1384

compression_mode = COMP_MODE_PLAIN

1375

elif not h:

1385

elif not h:

1376

t = d[0:1]

1386

t = d[0:1]

1377

if t == b'\0':

1387

if t == b'\0':

1378

compression_mode = COMP_MODE_PLAIN

1388

compression_mode = COMP_MODE_PLAIN

1379

elif t == default_compression_header:

1389

elif t == default_compression_header:

1380

compression_mode = COMP_MODE_DEFAULT

1390

compression_mode = COMP_MODE_DEFAULT

1381

elif h == b'u':

1391

elif h == b'u':

1382

# we have a more efficient way to declare uncompressed

1392

# we have a more efficient way to declare uncompressed

1383

h = b''

1393

h = b''

1384

compression_mode = COMP_MODE_PLAIN

1394

compression_mode = COMP_MODE_PLAIN

1385

deltainfo = drop_u_compression(deltainfo)

1395

deltainfo = drop_u_compression(deltainfo)

1386

return compression_mode, deltainfo

1396

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
-            def _candidategroups(revlog, textlen, p1, p2, cachedelta):
+            def _candidategroups(
+                revlog,
+                textlen,
+                p1,
+                p2,
+                cachedelta,
+            ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, b'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 # XXX cache me higher
                 snapshots = collections.defaultdict(list)
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshots,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: snapshot-rev
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
                         for idx, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
                             if max_depth < idx:
                                 break
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
-                        self.revlog, revinfo.textlen, p1r, p2r, cachedelta
+                        self.revlog,
+                        revinfo.textlen,
+                        p1r,
+                        p2r,
+                        cachedelta,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             if candidaterev in excluded_bases:
                                 if debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     EXCLUDED\n"
                                     self._write_debug(msg)
                                 continue
                             if candidaterev >= target_rev:
                                 if debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     TOO-HIGH\n"
                                     self._write_debug(msg)
                                 continue
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         used_cached = (
                             cachedelta is not None
                             and dbg_try_rounds == 1
                             and dbg_try_count == 1
                             and deltainfo.base == cachedelta[0]
                         )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo