upstream/mercurial-mirror Commit - r50510:01ccb45b

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

11

import collections

11

import collections

12

import struct

12

import struct

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from ..node import nullrev

15

from ..node import nullrev

16

from ..i18n import _

16

from ..i18n import _

17

from ..pycompat import getattr

17

from ..pycompat import getattr

18

19

from .constants import (

19

from .constants import (

20

COMP_MODE_DEFAULT,

20

COMP_MODE_DEFAULT,

21

COMP_MODE_INLINE,

21

COMP_MODE_INLINE,

22

COMP_MODE_PLAIN,

22

COMP_MODE_PLAIN,

23

KIND_CHANGELOG,

23

KIND_CHANGELOG,

24

KIND_FILELOG,

24

KIND_FILELOG,

25

KIND_MANIFESTLOG,

25

KIND_MANIFESTLOG,

26

REVIDX_ISCENSORED,

26

REVIDX_ISCENSORED,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

27

REVIDX_RAWTEXT_CHANGING_FLAGS,

28

)

28

)

29

30

from ..thirdparty import attr

30

from ..thirdparty import attr

31

32

from .. import (

32

from .. import (

33

error,

33

error,

34

mdiff,

34

mdiff,

35

util,

35

util,

36

)

36

)

37

38

from . import flagutil

38

from . import flagutil

39

40

# maximum <delta-chain-data>/<revision-text-length> ratio

40

# maximum <delta-chain-data>/<revision-text-length> ratio

41

LIMIT_DELTA2TEXT = 2

41

LIMIT_DELTA2TEXT = 2

42

43

44

class _testrevlog:

44

class _testrevlog:

45

"""minimalist fake revlog to use in doctests"""

45

"""minimalist fake revlog to use in doctests"""

46

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

47

def __init__(self, data, density=0.5, mingap=0, snapshot=()):

48

"""data is an list of revision payload boundaries"""

48

"""data is an list of revision payload boundaries"""

49

self._data = data

49

self._data = data

50

self._srdensitythreshold = density

50

self._srdensitythreshold = density

51

self._srmingapsize = mingap

51

self._srmingapsize = mingap

52

self._snapshot = set(snapshot)

52

self._snapshot = set(snapshot)

53

self.index = None

53

self.index = None

54

55

def start(self, rev):

55

def start(self, rev):

56

if rev == nullrev:

56

if rev == nullrev:

57

return 0

57

return 0

58

if rev == 0:

58

if rev == 0:

59

return 0

59

return 0

60

return self._data[rev - 1]

60

return self._data[rev - 1]

61

62

def end(self, rev):

62

def end(self, rev):

63

if rev == nullrev:

63

if rev == nullrev:

64

return 0

64

return 0

65

return self._data[rev]

65

return self._data[rev]

66

67

def length(self, rev):

67

def length(self, rev):

68

return self.end(rev) - self.start(rev)

68

return self.end(rev) - self.start(rev)

69

70

def __len__(self):

70

def __len__(self):

71

return len(self._data)

71

return len(self._data)

72

73

def issnapshot(self, rev):

73

def issnapshot(self, rev):

74

if rev == nullrev:

74

if rev == nullrev:

75

return True

75

return True

76

return rev in self._snapshot

76

return rev in self._snapshot

77

78

79

def slicechunk(revlog, revs, targetsize=None):

79

def slicechunk(revlog, revs, targetsize=None):

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

80

"""slice revs to reduce the amount of unrelated data to be read from disk.

81

82

``revs`` is sliced into groups that should be read in one time.

82

``revs`` is sliced into groups that should be read in one time.

83

Assume that revs are sorted.

83

Assume that revs are sorted.

84

85

The initial chunk is sliced until the overall density (payload/chunks-span

85

The initial chunk is sliced until the overall density (payload/chunks-span

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

86

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

87

`revlog._srmingapsize` is skipped.

87

`revlog._srmingapsize` is skipped.

88

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

89

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

90

For consistency with other slicing choice, this limit won't go lower than

90

For consistency with other slicing choice, this limit won't go lower than

91

`revlog._srmingapsize`.

91

`revlog._srmingapsize`.

92

93

If individual revisions chunk are larger than this limit, they will still

93

If individual revisions chunk are larger than this limit, they will still

94

be raised individually.

94

be raised individually.

95

96

>>> data = [

96

>>> data = [

97

... 5, #00 (5)

97

... 5, #00 (5)

98

... 10, #01 (5)

98

... 10, #01 (5)

99

... 12, #02 (2)

99

... 12, #02 (2)

100

... 12, #03 (empty)

100

... 12, #03 (empty)

101

... 27, #04 (15)

101

... 27, #04 (15)

102

... 31, #05 (4)

102

... 31, #05 (4)

103

... 31, #06 (empty)

103

... 31, #06 (empty)

104

... 42, #07 (11)

104

... 42, #07 (11)

105

... 47, #08 (5)

105

... 47, #08 (5)

106

... 47, #09 (empty)

106

... 47, #09 (empty)

107

... 48, #10 (1)

107

... 48, #10 (1)

108

... 51, #11 (3)

108

... 51, #11 (3)

109

... 74, #12 (23)

109

... 74, #12 (23)

110

... 85, #13 (11)

110

... 85, #13 (11)

111

... 86, #14 (1)

111

... 86, #14 (1)

112

... 91, #15 (5)

112

... 91, #15 (5)

113

... ]

113

... ]

114

>>> revlog = _testrevlog(data, snapshot=range(16))

114

>>> revlog = _testrevlog(data, snapshot=range(16))

115

116

>>> list(slicechunk(revlog, list(range(16))))

116

>>> list(slicechunk(revlog, list(range(16))))

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

117

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

118

>>> list(slicechunk(revlog, [0, 15]))

118

>>> list(slicechunk(revlog, [0, 15]))

119

[[0], [15]]

119

[[0], [15]]

120

>>> list(slicechunk(revlog, [0, 11, 15]))

120

>>> list(slicechunk(revlog, [0, 11, 15]))

121

[[0], [11], [15]]

121

[[0], [11], [15]]

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

122

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

123

[[0], [11, 13, 15]]

123

[[0], [11, 13, 15]]

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

124

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

125

[[1, 2], [5, 8, 10, 11], [14]]

125

[[1, 2], [5, 8, 10, 11], [14]]

126

127

Slicing with a maximum chunk size

127

Slicing with a maximum chunk size

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

128

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

129

[[0], [11], [13], [15]]

129

[[0], [11], [13], [15]]

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

130

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

131

[[0], [11], [13, 15]]

131

[[0], [11], [13, 15]]

132

133

Slicing involving nullrev

133

Slicing involving nullrev

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

134

>>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))

135

[[-1, 0], [11], [13, 15]]

135

[[-1, 0], [11], [13, 15]]

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

136

>>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))

137

[[-1], [13], [15]]

137

[[-1], [13], [15]]

138

"""

138

"""

139

if targetsize is not None:

139

if targetsize is not None:

140

targetsize = max(targetsize, revlog._srmingapsize)

140

targetsize = max(targetsize, revlog._srmingapsize)

141

# targetsize should not be specified when evaluating delta candidates:

141

# targetsize should not be specified when evaluating delta candidates:

142

# * targetsize is used to ensure we stay within specification when reading,

142

# * targetsize is used to ensure we stay within specification when reading,

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

143

densityslicing = getattr(revlog.index, 'slicechunktodensity', None)

144

if densityslicing is None:

144

if densityslicing is None:

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

145

densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)

146

for chunk in densityslicing(

146

for chunk in densityslicing(

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

147

revs, revlog._srdensitythreshold, revlog._srmingapsize

148

):

148

):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

149

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

150

yield subchunk

150

yield subchunk

151

152

153

def _slicechunktosize(revlog, revs, targetsize=None):

153

def _slicechunktosize(revlog, revs, targetsize=None):

154

"""slice revs to match the target size

154

"""slice revs to match the target size

155

156

This is intended to be used on chunk that density slicing selected by that

156

This is intended to be used on chunk that density slicing selected by that

157

are still too large compared to the read garantee of revlog. This might

157

are still too large compared to the read garantee of revlog. This might

158

happens when "minimal gap size" interrupted the slicing or when chain are

158

happens when "minimal gap size" interrupted the slicing or when chain are

159

built in a way that create large blocks next to each other.

159

built in a way that create large blocks next to each other.

160

161

>>> data = [

161

>>> data = [

162

... 3, #0 (3)

162

... 3, #0 (3)

163

... 5, #1 (2)

163

... 5, #1 (2)

164

... 6, #2 (1)

164

... 6, #2 (1)

165

... 8, #3 (2)

165

... 8, #3 (2)

166

... 8, #4 (empty)

166

... 8, #4 (empty)

167

... 11, #5 (3)

167

... 11, #5 (3)

168

... 12, #6 (1)

168

... 12, #6 (1)

169

... 13, #7 (1)

169

... 13, #7 (1)

170

... 14, #8 (1)

170

... 14, #8 (1)

171

... ]

171

... ]

172

173

== All snapshots cases ==

173

== All snapshots cases ==

174

>>> revlog = _testrevlog(data, snapshot=range(9))

174

>>> revlog = _testrevlog(data, snapshot=range(9))

175

176

Cases where chunk is already small enough

176

Cases where chunk is already small enough

177

>>> list(_slicechunktosize(revlog, [0], 3))

177

>>> list(_slicechunktosize(revlog, [0], 3))

178

[[0]]

178

[[0]]

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

179

>>> list(_slicechunktosize(revlog, [6, 7], 3))

180

[[6, 7]]

180

[[6, 7]]

181

>>> list(_slicechunktosize(revlog, [0], None))

181

>>> list(_slicechunktosize(revlog, [0], None))

182

[[0]]

182

[[0]]

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

183

>>> list(_slicechunktosize(revlog, [6, 7], None))

184

[[6, 7]]

184

[[6, 7]]

185

186

cases where we need actual slicing

186

cases where we need actual slicing

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

187

>>> list(_slicechunktosize(revlog, [0, 1], 3))

188

[[0], [1]]

188

[[0], [1]]

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

189

>>> list(_slicechunktosize(revlog, [1, 3], 3))

190

[[1], [3]]

190

[[1], [3]]

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

191

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

192

[[1, 2], [3]]

192

[[1, 2], [3]]

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

193

>>> list(_slicechunktosize(revlog, [3, 5], 3))

194

[[3], [5]]

194

[[3], [5]]

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

195

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

196

[[3], [5]]

196

[[3], [5]]

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

197

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

198

[[5], [6, 7, 8]]

198

[[5], [6, 7, 8]]

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

199

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

200

[[0], [1, 2], [3], [5], [6, 7, 8]]

201

202

Case with too large individual chunk (must return valid chunk)

202

Case with too large individual chunk (must return valid chunk)

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

203

>>> list(_slicechunktosize(revlog, [0, 1], 2))

204

[[0], [1]]

204

[[0], [1]]

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

205

>>> list(_slicechunktosize(revlog, [1, 3], 1))

206

[[1], [3]]

206

[[1], [3]]

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

207

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

208

[[3], [5]]

208

[[3], [5]]

209

210

== No Snapshot cases ==

210

== No Snapshot cases ==

211

>>> revlog = _testrevlog(data)

211

>>> revlog = _testrevlog(data)

212

213

Cases where chunk is already small enough

213

Cases where chunk is already small enough

214

>>> list(_slicechunktosize(revlog, [0], 3))

214

>>> list(_slicechunktosize(revlog, [0], 3))

215

[[0]]

215

[[0]]

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

216

>>> list(_slicechunktosize(revlog, [6, 7], 3))

217

[[6, 7]]

217

[[6, 7]]

218

>>> list(_slicechunktosize(revlog, [0], None))

218

>>> list(_slicechunktosize(revlog, [0], None))

219

[[0]]

219

[[0]]

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

220

>>> list(_slicechunktosize(revlog, [6, 7], None))

221

[[6, 7]]

221

[[6, 7]]

222

223

cases where we need actual slicing

223

cases where we need actual slicing

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

224

>>> list(_slicechunktosize(revlog, [0, 1], 3))

225

[[0], [1]]

225

[[0], [1]]

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

226

>>> list(_slicechunktosize(revlog, [1, 3], 3))

227

[[1], [3]]

227

[[1], [3]]

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

228

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

229

[[1], [2, 3]]

229

[[1], [2, 3]]

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

230

>>> list(_slicechunktosize(revlog, [3, 5], 3))

231

[[3], [5]]

231

[[3], [5]]

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

232

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

233

[[3], [4, 5]]

233

[[3], [4, 5]]

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

234

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

235

[[5], [6, 7, 8]]

235

[[5], [6, 7, 8]]

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

236

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

237

[[0], [1, 2], [3], [5], [6, 7, 8]]

238

239

Case with too large individual chunk (must return valid chunk)

239

Case with too large individual chunk (must return valid chunk)

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

240

>>> list(_slicechunktosize(revlog, [0, 1], 2))

241

[[0], [1]]

241

[[0], [1]]

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

242

>>> list(_slicechunktosize(revlog, [1, 3], 1))

243

[[1], [3]]

243

[[1], [3]]

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

244

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

245

[[3], [5]]

245

[[3], [5]]

246

247

== mixed case ==

247

== mixed case ==

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

248

>>> revlog = _testrevlog(data, snapshot=[0, 1, 2])

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

249

>>> list(_slicechunktosize(revlog, list(range(9)), 5))

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

250

[[0, 1], [2], [3, 4, 5], [6, 7, 8]]

251

"""

251

"""

252

assert targetsize is None or 0 <= targetsize

252

assert targetsize is None or 0 <= targetsize

253

startdata = revlog.start(revs[0])

253

startdata = revlog.start(revs[0])

254

enddata = revlog.end(revs[-1])

254

enddata = revlog.end(revs[-1])

255

fullspan = enddata - startdata

255

fullspan = enddata - startdata

256

if targetsize is None or fullspan <= targetsize:

256

if targetsize is None or fullspan <= targetsize:

257

yield revs

257

yield revs

258

return

258

return

259

260

startrevidx = 0

260

startrevidx = 0

261

endrevidx = 1

261

endrevidx = 1

262

iterrevs = enumerate(revs)

262

iterrevs = enumerate(revs)

263

next(iterrevs) # skip first rev.

263

next(iterrevs) # skip first rev.

264

# first step: get snapshots out of the way

264

# first step: get snapshots out of the way

265

for idx, r in iterrevs:

265

for idx, r in iterrevs:

266

span = revlog.end(r) - startdata

266

span = revlog.end(r) - startdata

267

snapshot = revlog.issnapshot(r)

267

snapshot = revlog.issnapshot(r)

268

if span <= targetsize and snapshot:

268

if span <= targetsize and snapshot:

269

endrevidx = idx + 1

269

endrevidx = idx + 1

270

else:

270

else:

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

271

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

272

if chunk:

272

if chunk:

273

yield chunk

273

yield chunk

274

startrevidx = idx

274

startrevidx = idx

275

startdata = revlog.start(r)

275

startdata = revlog.start(r)

276

endrevidx = idx + 1

276

endrevidx = idx + 1

277

if not snapshot:

277

if not snapshot:

278

break

278

break

279

280

# for the others, we use binary slicing to quickly converge toward valid

280

# for the others, we use binary slicing to quickly converge toward valid

281

# chunks (otherwise, we might end up looking for start/end of many

281

# chunks (otherwise, we might end up looking for start/end of many

282

# revisions). This logic is not looking for the perfect slicing point, it

282

# revisions). This logic is not looking for the perfect slicing point, it

283

# focuses on quickly converging toward valid chunks.

283

# focuses on quickly converging toward valid chunks.

284

nbitem = len(revs)

284

nbitem = len(revs)

285

while (enddata - startdata) > targetsize:

285

while (enddata - startdata) > targetsize:

286

endrevidx = nbitem

286

endrevidx = nbitem

287

if nbitem - startrevidx <= 1:

287

if nbitem - startrevidx <= 1:

288

break # protect against individual chunk larger than limit

288

break # protect against individual chunk larger than limit

289

localenddata = revlog.end(revs[endrevidx - 1])

289

localenddata = revlog.end(revs[endrevidx - 1])

290

span = localenddata - startdata

290

span = localenddata - startdata

291

while span > targetsize:

291

while span > targetsize:

292

if endrevidx - startrevidx <= 1:

292

if endrevidx - startrevidx <= 1:

293

break # protect against individual chunk larger than limit

293

break # protect against individual chunk larger than limit

294

endrevidx -= (endrevidx - startrevidx) // 2

294

endrevidx -= (endrevidx - startrevidx) // 2

295

localenddata = revlog.end(revs[endrevidx - 1])

295

localenddata = revlog.end(revs[endrevidx - 1])

296

span = localenddata - startdata

296

span = localenddata - startdata

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

297

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)

298

if chunk:

298

if chunk:

299

yield chunk

299

yield chunk

300

startrevidx = endrevidx

300

startrevidx = endrevidx

301

startdata = revlog.start(revs[startrevidx])

301

startdata = revlog.start(revs[startrevidx])

302

303

chunk = _trimchunk(revlog, revs, startrevidx)

303

chunk = _trimchunk(revlog, revs, startrevidx)

304

if chunk:

304

if chunk:

305

yield chunk

305

yield chunk

306

307

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

308

def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

309

"""slice revs to reduce the amount of unrelated data to be read from disk.

310

311

``revs`` is sliced into groups that should be read in one time.

311

``revs`` is sliced into groups that should be read in one time.

312

Assume that revs are sorted.

312

Assume that revs are sorted.

313

314

The initial chunk is sliced until the overall density (payload/chunks-span

314

The initial chunk is sliced until the overall density (payload/chunks-span

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

315

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

316

skipped.

316

skipped.

317

318

>>> revlog = _testrevlog([

318

>>> revlog = _testrevlog([

319

... 5, #00 (5)

319

... 5, #00 (5)

320

... 10, #01 (5)

320

... 10, #01 (5)

321

... 12, #02 (2)

321

... 12, #02 (2)

322

... 12, #03 (empty)

322

... 12, #03 (empty)

323

... 27, #04 (15)

323

... 27, #04 (15)

324

... 31, #05 (4)

324

... 31, #05 (4)

325

... 31, #06 (empty)

325

... 31, #06 (empty)

326

... 42, #07 (11)

326

... 42, #07 (11)

327

... 47, #08 (5)

327

... 47, #08 (5)

328

... 47, #09 (empty)

328

... 47, #09 (empty)

329

... 48, #10 (1)

329

... 48, #10 (1)

330

... 51, #11 (3)

330

... 51, #11 (3)

331

... 74, #12 (23)

331

... 74, #12 (23)

332

... 85, #13 (11)

332

... 85, #13 (11)

333

... 86, #14 (1)

333

... 86, #14 (1)

334

... 91, #15 (5)

334

... 91, #15 (5)

335

... ])

335

... ])

336

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

337

>>> list(_slicechunktodensity(revlog, list(range(16))))

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

338

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

339

>>> list(_slicechunktodensity(revlog, [0, 15]))

340

[[0], [15]]

340

[[0], [15]]

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

341

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

342

[[0], [11], [15]]

342

[[0], [11], [15]]

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

343

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

344

[[0], [11, 13, 15]]

344

[[0], [11, 13, 15]]

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

345

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

346

[[1, 2], [5, 8, 10, 11], [14]]

346

[[1, 2], [5, 8, 10, 11], [14]]

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

347

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

348

... mingapsize=20))

348

... mingapsize=20))

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

349

[[1, 2, 3, 5, 8, 10, 11], [14]]

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

350

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

351

... targetdensity=0.95))

351

... targetdensity=0.95))

352

[[1, 2], [5], [8, 10, 11], [14]]

352

[[1, 2], [5], [8, 10, 11], [14]]

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

353

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

354

... targetdensity=0.95, mingapsize=12))

354

... targetdensity=0.95, mingapsize=12))

355

[[1, 2], [5, 8, 10, 11], [14]]

355

[[1, 2], [5, 8, 10, 11], [14]]

356

"""

356

"""

357

start = revlog.start

357

start = revlog.start

358

length = revlog.length

358

length = revlog.length

359

360

if len(revs) <= 1:

360

if len(revs) <= 1:

361

yield revs

361

yield revs

362

return

362

return

363

364

deltachainspan = segmentspan(revlog, revs)

364

deltachainspan = segmentspan(revlog, revs)

365

366

if deltachainspan < mingapsize:

366

if deltachainspan < mingapsize:

367

yield revs

367

yield revs

368

return

368

return

369

370

readdata = deltachainspan

370

readdata = deltachainspan

371

chainpayload = sum(length(r) for r in revs)

371

chainpayload = sum(length(r) for r in revs)

372

373

if deltachainspan:

373

if deltachainspan:

374

density = chainpayload / float(deltachainspan)

374

density = chainpayload / float(deltachainspan)

375

else:

375

else:

376

density = 1.0

376

density = 1.0

377

378

if density >= targetdensity:

378

if density >= targetdensity:

379

yield revs

379

yield revs

380

return

380

return

381

382

# Store the gaps in a heap to have them sorted by decreasing size

382

# Store the gaps in a heap to have them sorted by decreasing size

383

gaps = []

383

gaps = []

384

prevend = None

384

prevend = None

385

for i, rev in enumerate(revs):

385

for i, rev in enumerate(revs):

386

revstart = start(rev)

386

revstart = start(rev)

387

revlen = length(rev)

387

revlen = length(rev)

388

389

# Skip empty revisions to form larger holes

389

# Skip empty revisions to form larger holes

390

if revlen == 0:

390

if revlen == 0:

391

continue

391

continue

392

393

if prevend is not None:

393

if prevend is not None:

394

gapsize = revstart - prevend

394

gapsize = revstart - prevend

395

# only consider holes that are large enough

395

# only consider holes that are large enough

396

if gapsize > mingapsize:

396

if gapsize > mingapsize:

397

gaps.append((gapsize, i))

397

gaps.append((gapsize, i))

398

399

prevend = revstart + revlen

399

prevend = revstart + revlen

400

# sort the gaps to pop them from largest to small

400

# sort the gaps to pop them from largest to small

401

gaps.sort()

401

gaps.sort()

402

403

# Collect the indices of the largest holes until the density is acceptable

403

# Collect the indices of the largest holes until the density is acceptable

404

selected = []

404

selected = []

405

while gaps and density < targetdensity:

405

while gaps and density < targetdensity:

406

gapsize, gapidx = gaps.pop()

406

gapsize, gapidx = gaps.pop()

407

408

selected.append(gapidx)

408

selected.append(gapidx)

409

410

# the gap sizes are stored as negatives to be sorted decreasingly

410

# the gap sizes are stored as negatives to be sorted decreasingly

411

# by the heap

411

# by the heap

412

readdata -= gapsize

412

readdata -= gapsize

413

if readdata > 0:

413

if readdata > 0:

414

density = chainpayload / float(readdata)

414

density = chainpayload / float(readdata)

415

else:

415

else:

416

density = 1.0

416

density = 1.0

417

selected.sort()

417

selected.sort()

418

419

# Cut the revs at collected indices

419

# Cut the revs at collected indices

420

previdx = 0

420

previdx = 0

421

for idx in selected:

421

for idx in selected:

422

423

chunk = _trimchunk(revlog, revs, previdx, idx)

423

chunk = _trimchunk(revlog, revs, previdx, idx)

424

if chunk:

424

if chunk:

425

yield chunk

425

yield chunk

426

427

previdx = idx

427

previdx = idx

428

429

chunk = _trimchunk(revlog, revs, previdx)

429

chunk = _trimchunk(revlog, revs, previdx)

430

if chunk:

430

if chunk:

431

yield chunk

431

yield chunk

432

433

434

def _trimchunk(revlog, revs, startidx, endidx=None):

434

def _trimchunk(revlog, revs, startidx, endidx=None):

435

"""returns revs[startidx:endidx] without empty trailing revs

435

"""returns revs[startidx:endidx] without empty trailing revs

436

437

Doctest Setup

437

Doctest Setup

438

>>> revlog = _testrevlog([

438

>>> revlog = _testrevlog([

439

... 5, #0

439

... 5, #0

440

... 10, #1

440

... 10, #1

441

... 12, #2

441

... 12, #2

442

... 12, #3 (empty)

442

... 12, #3 (empty)

443

... 17, #4

443

... 17, #4

444

... 21, #5

444

... 21, #5

445

... 21, #6 (empty)

445

... 21, #6 (empty)

446

... ])

446

... ])

447

448

Contiguous cases:

448

Contiguous cases:

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

449

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

450

[0, 1, 2, 3, 4, 5]

450

[0, 1, 2, 3, 4, 5]

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

451

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

452

[0, 1, 2, 3, 4]

452

[0, 1, 2, 3, 4]

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

453

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

454

[0, 1, 2]

454

[0, 1, 2]

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

455

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

456

[2]

456

[2]

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

457

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

458

[3, 4, 5]

458

[3, 4, 5]

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

459

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

460

[3, 4]

460

[3, 4]

461

462

Discontiguous cases:

462

Discontiguous cases:

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

463

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

464

[1, 3, 5]

464

[1, 3, 5]

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

465

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

466

[1]

466

[1]

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

467

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

468

[3, 5]

468

[3, 5]

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

469

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

470

[3, 5]

470

[3, 5]

471

"""

471

"""

472

length = revlog.length

472

length = revlog.length

473

474

if endidx is None:

474

if endidx is None:

475

endidx = len(revs)

475

endidx = len(revs)

476

477

# If we have a non-emtpy delta candidate, there are nothing to trim

477

# If we have a non-emtpy delta candidate, there are nothing to trim

478

if revs[endidx - 1] < len(revlog):

478

if revs[endidx - 1] < len(revlog):

479

# Trim empty revs at the end, except the very first revision of a chain

479

# Trim empty revs at the end, except the very first revision of a chain

480

while (

480

while (

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

481

endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0

482

):

482

):

483

endidx -= 1

483

endidx -= 1

484

485

return revs[startidx:endidx]

485

return revs[startidx:endidx]

486

487

488

def segmentspan(revlog, revs):

488

def segmentspan(revlog, revs):

489

"""Get the byte span of a segment of revisions

489

"""Get the byte span of a segment of revisions

490

491

revs is a sorted array of revision numbers

491

revs is a sorted array of revision numbers

492

493

>>> revlog = _testrevlog([

493

>>> revlog = _testrevlog([

494

... 5, #0

494

... 5, #0

495

... 10, #1

495

... 10, #1

496

... 12, #2

496

... 12, #2

497

... 12, #3 (empty)

497

... 12, #3 (empty)

498

... 17, #4

498

... 17, #4

499

... ])

499

... ])

500

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

501

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

502

17

502

17

503

>>> segmentspan(revlog, [0, 4])

503

>>> segmentspan(revlog, [0, 4])

504

17

504

17

505

>>> segmentspan(revlog, [3, 4])

505

>>> segmentspan(revlog, [3, 4])

506

5

506

5

507

>>> segmentspan(revlog, [1, 2, 3,])

507

>>> segmentspan(revlog, [1, 2, 3,])

508

7

508

7

509

>>> segmentspan(revlog, [1, 3])

509

>>> segmentspan(revlog, [1, 3])

510

7

510

7

511

"""

511

"""

512

if not revs:

512

if not revs:

513

return 0

513

return 0

514

end = revlog.end(revs[-1])

514

end = revlog.end(revs[-1])

515

return end - revlog.start(revs[0])

515

return end - revlog.start(revs[0])

516

517

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

518

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

519

"""build full text from a (base, delta) pair and other metadata"""

519

"""build full text from a (base, delta) pair and other metadata"""

520

# special case deltas which replace entire base; no need to decode

520

# special case deltas which replace entire base; no need to decode

521

# base revision. this neatly avoids censored bases, which throw when

521

# base revision. this neatly avoids censored bases, which throw when

522

# they're decoded.

522

# they're decoded.

523

hlen = struct.calcsize(b">lll")

523

hlen = struct.calcsize(b">lll")

524

if delta[:hlen] == mdiff.replacediffheader(

524

if delta[:hlen] == mdiff.replacediffheader(

525

revlog.rawsize(baserev), len(delta) - hlen

525

revlog.rawsize(baserev), len(delta) - hlen

526

):

526

):

527

fulltext = delta[hlen:]

527

fulltext = delta[hlen:]

528

else:

528

else:

529

# deltabase is rawtext before changed by flag processors, which is

529

# deltabase is rawtext before changed by flag processors, which is

530

# equivalent to non-raw text

530

# equivalent to non-raw text

531

basetext = revlog.revision(baserev, _df=fh)

531

basetext = revlog.revision(baserev, _df=fh)

532

fulltext = mdiff.patch(basetext, delta)

532

fulltext = mdiff.patch(basetext, delta)

533

534

try:

534

try:

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

535

validatehash = flagutil.processflagsraw(revlog, fulltext, flags)

536

if validatehash:

536

if validatehash:

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

537

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

538

if flags & REVIDX_ISCENSORED:

538

if flags & REVIDX_ISCENSORED:

539

raise error.StorageError(

539

raise error.StorageError(

540

_(b'node %s is not censored') % expectednode

540

_(b'node %s is not censored') % expectednode

541

)

541

)

542

except error.CensoredNodeError:

542

except error.CensoredNodeError:

543

# must pass the censored index flag to add censored revisions

543

# must pass the censored index flag to add censored revisions

544

if not flags & REVIDX_ISCENSORED:

544

if not flags & REVIDX_ISCENSORED:

545

raise

545

raise

546

return fulltext

546

return fulltext

547

548

549

@attr.s(slots=True, frozen=True)

549

@attr.s(slots=True, frozen=True)

550

class _deltainfo:

550

class _deltainfo:

551

distance = attr.ib()

551

distance = attr.ib()

552

deltalen = attr.ib()

552

deltalen = attr.ib()

553

data = attr.ib()

553

data = attr.ib()

554

base = attr.ib()

554

base = attr.ib()

555

chainbase = attr.ib()

555

chainbase = attr.ib()

556

chainlen = attr.ib()

556

chainlen = attr.ib()

557

compresseddeltalen = attr.ib()

557

compresseddeltalen = attr.ib()

558

snapshotdepth = attr.ib()

558

snapshotdepth = attr.ib()

559

560

561

def drop_u_compression(delta):

561

def drop_u_compression(delta):

562

"""turn into a "u" (no-compression) into no-compression without header

562

"""turn into a "u" (no-compression) into no-compression without header

563

564

This is useful for revlog format that has better compression method.

564

This is useful for revlog format that has better compression method.

565

"""

565

"""

566

assert delta.data[0] == b'u', delta.data[0]

566

assert delta.data[0] == b'u', delta.data[0]

567

return _deltainfo(

567

return _deltainfo(

568

delta.distance,

568

delta.distance,

569

delta.deltalen - 1,

569

delta.deltalen - 1,

570

(b'', delta.data[1]),

570

(b'', delta.data[1]),

571

delta.base,

571

delta.base,

572

delta.chainbase,

572

delta.chainbase,

573

delta.chainlen,

573

delta.chainlen,

574

delta.compresseddeltalen,

574

delta.compresseddeltalen,

575

delta.snapshotdepth,

575

delta.snapshotdepth,

576

)

576

)

577

578

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

579

def isgooddeltainfo(revlog, deltainfo, revinfo):

580

"""Returns True if the given delta is good. Good means that it is within

580

"""Returns True if the given delta is good. Good means that it is within

581

the disk span, disk size, and chain length bounds that we know to be

581

the disk span, disk size, and chain length bounds that we know to be

582

performant."""

582

performant."""

583

if deltainfo is None:

583

if deltainfo is None:

584

return False

584

return False

585

586

# - 'deltainfo.distance' is the distance from the base revision --

586

# - 'deltainfo.distance' is the distance from the base revision --

587

# bounding it limits the amount of I/O we need to do.

587

# bounding it limits the amount of I/O we need to do.

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

588

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

589

# deltas we need to apply -- bounding it limits the amount of CPU

589

# deltas we need to apply -- bounding it limits the amount of CPU

590

# we consume.

590

# we consume.

591

592

textlen = revinfo.textlen

592

textlen = revinfo.textlen

593

defaultmax = textlen * 4

593

defaultmax = textlen * 4

594

maxdist = revlog._maxdeltachainspan

594

maxdist = revlog._maxdeltachainspan

595

if not maxdist:

595

if not maxdist:

596

maxdist = deltainfo.distance # ensure the conditional pass

596

maxdist = deltainfo.distance # ensure the conditional pass

597

maxdist = max(maxdist, defaultmax)

597

maxdist = max(maxdist, defaultmax)

598

599

# Bad delta from read span:

599

# Bad delta from read span:

600

#

600

#

601

# If the span of data read is larger than the maximum allowed.

601

# If the span of data read is larger than the maximum allowed.

602

#

602

#

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

603

# In the sparse-revlog case, we rely on the associated "sparse reading"

604

# to avoid issue related to the span of data. In theory, it would be

604

# to avoid issue related to the span of data. In theory, it would be

605

# possible to build pathological revlog where delta pattern would lead

605

# possible to build pathological revlog where delta pattern would lead

606

# to too many reads. However, they do not happen in practice at all. So

606

# to too many reads. However, they do not happen in practice at all. So

607

# we skip the span check entirely.

607

# we skip the span check entirely.

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

608

if not revlog._sparserevlog and maxdist < deltainfo.distance:

609

return False

609

return False

610

611

# Bad delta from new delta size:

611

# Bad delta from new delta size:

612

#

612

#

613

# If the delta size is larger than the target text, storing the

613

# If the delta size is larger than the target text, storing the

614

# delta will be inefficient.

614

# delta will be inefficient.

615

if textlen < deltainfo.deltalen:

615

if textlen < deltainfo.deltalen:

616

return False

616

return False

617

618

# Bad delta from cumulated payload size:

618

# Bad delta from cumulated payload size:

619

#

619

#

620

# If the sum of delta get larger than K * target text length.

620

# If the sum of delta get larger than K * target text length.

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

621

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

622

return False

622

return False

623

624

# Bad delta from chain length:

624

# Bad delta from chain length:

625

#

625

#

626

# If the number of delta in the chain gets too high.

626

# If the number of delta in the chain gets too high.

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

627

if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:

628

return False

628

return False

629

630

# bad delta from intermediate snapshot size limit

630

# bad delta from intermediate snapshot size limit

631

#

631

#

632

# If an intermediate snapshot size is higher than the limit. The

632

# If an intermediate snapshot size is higher than the limit. The

633

# limit exist to prevent endless chain of intermediate delta to be

633

# limit exist to prevent endless chain of intermediate delta to be

634

# created.

634

# created.

635

if (

635

if (

636

deltainfo.snapshotdepth is not None

636

deltainfo.snapshotdepth is not None

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

637

and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen

638

):

638

):

639

return False

639

return False

640

641

# bad delta if new intermediate snapshot is larger than the previous

641

# bad delta if new intermediate snapshot is larger than the previous

642

# snapshot

642

# snapshot

643

if (

643

if (

644

deltainfo.snapshotdepth

644

deltainfo.snapshotdepth

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

645

and revlog.length(deltainfo.base) < deltainfo.deltalen

646

):

646

):

647

return False

647

return False

648

649

return True

649

return True

650

651

652

# If a revision's full text is that much bigger than a base candidate full

652

# If a revision's full text is that much bigger than a base candidate full

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

653

# text's, it is very unlikely that it will produce a valid delta. We no longer

654

# consider these candidates.

654

# consider these candidates.

655

LIMIT_BASE2TEXT = 500

655

LIMIT_BASE2TEXT = 500

656

657

658

def _candidategroups(

658

def _candidategroups(

659

revlog,

659

revlog,

660

textlen,

660

textlen,

661

p1,

661

p1,

662

p2,

662

p2,

663

cachedelta,

663

cachedelta,

664

excluded_bases=None,

664

excluded_bases=None,

665

target_rev=None,

665

target_rev=None,

666

):

666

):

667

"""Provides group of revision to be tested as delta base

667

"""Provides group of revision to be tested as delta base

668

669

This top level function focus on emitting groups with unique and worthwhile

669

This top level function focus on emitting groups with unique and worthwhile

670

content. See _raw_candidate_groups for details about the group order.

670

content. See _raw_candidate_groups for details about the group order.

671

"""

671

"""

672

# should we try to build a delta?

672

# should we try to build a delta?

673

if not (len(revlog) and revlog._storedeltachains):

673

if not (len(revlog) and revlog._storedeltachains):

674

yield None

674

yield None

675

return

675

return

676

677

deltalength = revlog.length

677

deltalength = revlog.length

678

deltaparent = revlog.deltaparent

678

deltaparent = revlog.deltaparent

679

sparse = revlog._sparserevlog

679

sparse = revlog._sparserevlog

680

good = None

680

good = None

681

682

deltas_limit = textlen * LIMIT_DELTA2TEXT

682

deltas_limit = textlen * LIMIT_DELTA2TEXT

683

684

tested = {nullrev}

684

tested = {nullrev}

685

candidates = _refinedgroups(

685

candidates = _refinedgroups(

686

revlog,

686

revlog,

687

p1,

687

p1,

688

p2,

688

p2,

689

cachedelta,

689

cachedelta,

690

)

690

)

691

while True:

691

while True:

692

temptative = candidates.send(good)

692

temptative = candidates.send(good)

693

if temptative is None:

693

if temptative is None:

694

break

694

break

695

group = []

695

group = []

696

for rev in temptative:

696

for rev in temptative:

697

# skip over empty delta (no need to include them in a chain)

697

# skip over empty delta (no need to include them in a chain)

698

while revlog._generaldelta and not (

698

while revlog._generaldelta and not (

699

rev == nullrev or rev in tested or deltalength(rev)

699

rev == nullrev or rev in tested or deltalength(rev)

700

):

700

):

701

tested.add(rev)

701

tested.add(rev)

702

rev = deltaparent(rev)

702

rev = deltaparent(rev)

703

# no need to try a delta against nullrev, this will be done as a

703

# no need to try a delta against nullrev, this will be done as a

704

# last resort.

704

# last resort.

705

if rev == nullrev:

705

if rev == nullrev:

706

continue

706

continue

707

# filter out revision we tested already

707

# filter out revision we tested already

708

if rev in tested:

708

if rev in tested:

709

continue

709

continue

710

tested.add(rev)

710

tested.add(rev)

711

# an higher authority deamed the base unworthy (e.g. censored)

711

# an higher authority deamed the base unworthy (e.g. censored)

712

if excluded_bases is not None and rev in excluded_bases:

712

if excluded_bases is not None and rev in excluded_bases:

713

continue

713

continue

714

# We are in some recomputation cases and that rev is too high in

714

# We are in some recomputation cases and that rev is too high in

715

# the revlog

715

# the revlog

716

if target_rev is not None and rev >= target_rev:

716

if target_rev is not None and rev >= target_rev:

717

continue

717

continue

718

# filter out delta base that will never produce good delta

718

# filter out delta base that will never produce good delta

719

if deltas_limit < revlog.length(rev):

719

if deltas_limit < revlog.length(rev):

720

continue

720

continue

721

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

721

if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):

722

continue

722

continue

723

# no delta for rawtext-changing revs (see "candelta" for why)

723

# no delta for rawtext-changing revs (see "candelta" for why)

724

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

724

if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

725

continue

725

continue

726

727

# If we reach here, we are about to build and test a delta.

727

# If we reach here, we are about to build and test a delta.

728

# The delta building process will compute the chaininfo in all

728

# The delta building process will compute the chaininfo in all

729

# case, since that computation is cached, it is fine to access it

729

# case, since that computation is cached, it is fine to access it

730

# here too.

730

# here too.

731

chainlen, chainsize = revlog._chaininfo(rev)

731

chainlen, chainsize = revlog._chaininfo(rev)

732

# if chain will be too long, skip base

732

# if chain will be too long, skip base

733

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

733

if revlog._maxchainlen and chainlen >= revlog._maxchainlen:

734

continue

734

continue

735

# if chain already have too much data, skip base

735

# if chain already have too much data, skip base

736

if deltas_limit < chainsize:

736

if deltas_limit < chainsize:

737

continue

737

continue

738

if sparse and revlog.upperboundcomp is not None:

738

if sparse and revlog.upperboundcomp is not None:

739

maxcomp = revlog.upperboundcomp

739

maxcomp = revlog.upperboundcomp

740

basenotsnap = (p1, p2, nullrev)

740

basenotsnap = (p1, p2, nullrev)

741

if rev not in basenotsnap and revlog.issnapshot(rev):

741

if rev not in basenotsnap and revlog.issnapshot(rev):

742

snapshotdepth = revlog.snapshotdepth(rev)

742

snapshotdepth = revlog.snapshotdepth(rev)

743

# If text is significantly larger than the base, we can

743

# If text is significantly larger than the base, we can

744

# expect the resulting delta to be proportional to the size

744

# expect the resulting delta to be proportional to the size

745

# difference

745

# difference

746

revsize = revlog.rawsize(rev)

746

revsize = revlog.rawsize(rev)

747

rawsizedistance = max(textlen - revsize, 0)

747

rawsizedistance = max(textlen - revsize, 0)

748

# use an estimate of the compression upper bound.

748

# use an estimate of the compression upper bound.

749

lowestrealisticdeltalen = rawsizedistance // maxcomp

749

lowestrealisticdeltalen = rawsizedistance // maxcomp

750

751

# check the absolute constraint on the delta size

751

# check the absolute constraint on the delta size

752

snapshotlimit = textlen >> snapshotdepth

752

snapshotlimit = textlen >> snapshotdepth

753

if snapshotlimit < lowestrealisticdeltalen:

753

if snapshotlimit < lowestrealisticdeltalen:

754

# delta lower bound is larger than accepted upper bound

754

# delta lower bound is larger than accepted upper bound

755

continue

755

continue

756

757

# check the relative constraint on the delta size

757

# check the relative constraint on the delta size

758

revlength = revlog.length(rev)

758

revlength = revlog.length(rev)

759

if revlength < lowestrealisticdeltalen:

759

if revlength < lowestrealisticdeltalen:

760

# delta probable lower bound is larger than target base

760

# delta probable lower bound is larger than target base

761

continue

761

continue

762

763

group.append(rev)

763

group.append(rev)

764

if group:

764

if group:

765

# XXX: in the sparse revlog case, group can become large,

765

# XXX: in the sparse revlog case, group can become large,

766

# impacting performances. Some bounding or slicing mecanism

766

# impacting performances. Some bounding or slicing mecanism

767

# would help to reduce this impact.

767

# would help to reduce this impact.

768

good = yield tuple(group)

768

good = yield tuple(group)

769

yield None

769

yield None

770

771

772

def _findsnapshots(revlog, cache, start_rev):

772

def _findsnapshots(revlog, cache, start_rev):

773

"""find snapshot from start_rev to tip"""

773

"""find snapshot from start_rev to tip"""

774

if util.safehasattr(revlog.index, b'findsnapshots'):

774

if util.safehasattr(revlog.index, b'findsnapshots'):

775

revlog.index.findsnapshots(cache, start_rev)

775

revlog.index.findsnapshots(cache, start_rev)

776

else:

776

else:

777

deltaparent = revlog.deltaparent

777

deltaparent = revlog.deltaparent

778

issnapshot = revlog.issnapshot

778

issnapshot = revlog.issnapshot

779

for rev in revlog.revs(start_rev):

779

for rev in revlog.revs(start_rev):

780

if issnapshot(rev):

780

if issnapshot(rev):

781

cache[deltaparent(rev)].append(rev)

781

cache[deltaparent(rev)].append(rev)

782

783

784

def _refinedgroups(revlog, p1, p2, cachedelta):

784

def _refinedgroups(revlog, p1, p2, cachedelta):

785

good = None

785

good = None

786

# First we try to reuse a the delta contained in the bundle.

786

# First we try to reuse a the delta contained in the bundle.

787

# (or from the source revlog)

787

# (or from the source revlog)

788

#

788

#

789

# This logic only applies to general delta repositories and can be disabled

789

# This logic only applies to general delta repositories and can be disabled

790

# through configuration. Disabling reuse source delta is useful when

790

# through configuration. Disabling reuse source delta is useful when

791

# we want to make sure we recomputed "optimal" deltas.

791

# we want to make sure we recomputed "optimal" deltas.

792

debug_info = None

792

debug_info = None

793

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

793

if cachedelta and revlog._generaldelta and revlog._lazydeltabase:

794

# Assume what we received from the server is a good choice

794

# Assume what we received from the server is a good choice

795

# build delta will reuse the cache

795

# build delta will reuse the cache

796

if debug_info is not None:

796

if debug_info is not None:

797

debug_info['cached-delta.tested'] += 1

797

debug_info['cached-delta.tested'] += 1

798

good = yield (cachedelta[0],)

798

good = yield (cachedelta[0],)

799

if good is not None:

799

if good is not None:

800

if debug_info is not None:

800

if debug_info is not None:

801

debug_info['cached-delta.accepted'] += 1

801

debug_info['cached-delta.accepted'] += 1

802

yield None

802

yield None

803

return

803

return

804

# XXX cache me higher

804

# XXX cache me higher

805

snapshots = collections.defaultdict(list)

805

snapshots = collections.defaultdict(list)

806

groups = _rawgroups(

806

groups = _rawgroups(

807

revlog,

807

revlog,

808

p1,

808

p1,

809

p2,

809

p2,

810

cachedelta,

810

cachedelta,

811

snapshots,

811

snapshots,

812

)

812

)

813

for candidates in groups:

813

for candidates in groups:

814

good = yield candidates

814

good = yield candidates

815

if good is not None:

815

if good is not None:

816

break

816

break

817

818

# If sparse revlog is enabled, we can try to refine the available deltas

818

# If sparse revlog is enabled, we can try to refine the available deltas

819

if not revlog._sparserevlog:

819

if not revlog._sparserevlog:

820

yield None

820

yield None

821

return

821

return

822

823

# if we have a refinable value, try to refine it

823

# if we have a refinable value, try to refine it

824

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

824

if good is not None and good not in (p1, p2) and revlog.issnapshot(good):

825

# refine snapshot down

825

# refine snapshot down

826

previous = None

826

previous = None

827

while previous != good:

827

while previous != good:

828

previous = good

828

previous = good

829

base = revlog.deltaparent(good)

829

base = revlog.deltaparent(good)

830

if base == nullrev:

830

if base == nullrev:

831

break

831

break

832

good = yield (base,)

832

good = yield (base,)

833

# refine snapshot up

833

# refine snapshot up

834

if not snapshots:

834

if not snapshots:

835

_findsnapshots(revlog, snapshots, good + 1)

835

_findsnapshots(revlog, snapshots, good + 1)

836

previous = None

836

previous = None

837

while good != previous:

837

while good != previous:

838

previous = good

838

previous = good

839

children = tuple(sorted(c for c in snapshots[good]))

839

children = tuple(sorted(c for c in snapshots[good]))

840

good = yield children

840

good = yield children

841

842

if debug_info is not None:

842

if debug_info is not None:

843

if good is None:

843

if good is None:

844

debug_info['no-solution'] += 1

844

debug_info['no-solution'] += 1

845

846

yield None

846

yield None

847

848

849

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

849

def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):

850

"""Provides group of revision to be tested as delta base

850

"""Provides group of revision to be tested as delta base

851

852

This lower level function focus on emitting delta theorically interresting

852

This lower level function focus on emitting delta theorically interresting

853

without looking it any practical details.

853

without looking it any practical details.

854

855

The group order aims at providing fast or small candidates first.

855

The group order aims at providing fast or small candidates first.

856

"""

856

"""

857

gdelta = revlog._generaldelta

857

gdelta = revlog._generaldelta

858

# gate sparse behind general-delta because of issue6056

858

# gate sparse behind general-delta because of issue6056

859

sparse = gdelta and revlog._sparserevlog

859

sparse = gdelta and revlog._sparserevlog

860

curr = len(revlog)

860

curr = len(revlog)

861

prev = curr - 1

861

prev = curr - 1

862

deltachain = lambda rev: revlog._deltachain(rev)[0]

862

deltachain = lambda rev: revlog._deltachain(rev)[0]

863

864

if gdelta:

864

if gdelta:

865

# exclude already lazy tested base if any

865

# exclude already lazy tested base if any

866

parents = [p for p in (p1, p2) if p != nullrev]

866

parents = [p for p in (p1, p2) if p != nullrev]

867

868

if not revlog._deltabothparents and len(parents) == 2:

868

if not revlog._deltabothparents and len(parents) == 2:

869

parents.sort()

869

parents.sort()

870

# To minimize the chance of having to build a fulltext,

870

# To minimize the chance of having to build a fulltext,

871

# pick first whichever parent is closest to us (max rev)

871

# pick first whichever parent is closest to us (max rev)

872

yield (parents[1],)

872

yield (parents[1],)

873

# then the other one (min rev) if the first did not fit

873

# then the other one (min rev) if the first did not fit

874

yield (parents[0],)

874

yield (parents[0],)

875

elif len(parents) > 0:

875

elif len(parents) > 0:

876

# Test all parents (1 or 2), and keep the best candidate

876

# Test all parents (1 or 2), and keep the best candidate

877

yield parents

877

yield parents

878

879

if sparse and parents:

879

if sparse and parents:

880

if snapshots is None:

880

if snapshots is None:

881

# map: base-rev: [snapshot-revs]

881

# map: base-rev: [snapshot-revs]

882

snapshots = collections.defaultdict(list)

882

snapshots = collections.defaultdict(list)

883

# See if we can use an existing snapshot in the parent chains to use as

883

# See if we can use an existing snapshot in the parent chains to use as

884

# a base for a new intermediate-snapshot

884

# a base for a new intermediate-snapshot

885

#

885

#

886

# search for snapshot in parents delta chain

886

# search for snapshot in parents delta chain

887

# map: snapshot-level: snapshot-rev

887

# map: snapshot-level: snapshot-rev

888

parents_snaps = collections.defaultdict(set)

888

parents_snaps = collections.defaultdict(set)

889

candidate_chains = [deltachain(p) for p in parents]

889

candidate_chains = [deltachain(p) for p in parents]

890

for chain in candidate_chains:

890

for chain in candidate_chains:

891

for idx, s in enumerate(chain):

891

for idx, s in enumerate(chain):

892

if not revlog.issnapshot(s):

892

if not revlog.issnapshot(s):

893

break

893

break

894

parents_snaps[idx].add(s)

894

parents_snaps[idx].add(s)

895

snapfloor = min(parents_snaps[0]) + 1

895

snapfloor = min(parents_snaps[0]) + 1

896

_findsnapshots(revlog, snapshots, snapfloor)

896

_findsnapshots(revlog, snapshots, snapfloor)

897

# search for the highest "unrelated" revision

897

# search for the highest "unrelated" revision

898

#

898

#

899

# Adding snapshots used by "unrelated" revision increase the odd we

899

# Adding snapshots used by "unrelated" revision increase the odd we

900

# reuse an independant, yet better snapshot chain.

900

# reuse an independant, yet better snapshot chain.

901

#

901

#

902

# XXX instead of building a set of revisions, we could lazily enumerate

902

# XXX instead of building a set of revisions, we could lazily enumerate

903

# over the chains. That would be more efficient, however we stick to

903

# over the chains. That would be more efficient, however we stick to

904

# simple code for now.

904

# simple code for now.

905

all_revs = set()

905

all_revs = set()

906

for chain in candidate_chains:

906

for chain in candidate_chains:

907

all_revs.update(chain)

907

all_revs.update(chain)

908

other = None

908

other = None

909

for r in revlog.revs(prev, snapfloor):

909

for r in revlog.revs(prev, snapfloor):

910

if r not in all_revs:

910

if r not in all_revs:

911

other = r

911

other = r

912

break

912

break

913

if other is not None:

913

if other is not None:

914

# To avoid unfair competition, we won't use unrelated intermediate

914

# To avoid unfair competition, we won't use unrelated intermediate

915

# snapshot that are deeper than the ones from the parent delta

915

# snapshot that are deeper than the ones from the parent delta

916

# chain.

916

# chain.

917

max_depth = max(parents_snaps.keys())

917

max_depth = max(parents_snaps.keys())

918

chain = deltachain(other)

918

chain = deltachain(other)

919

for ~~idx~~, s in enumerate(chain):

919

for depth, s in enumerate(chain):

920

if s < snapfloor:

920

if s < snapfloor:

921

continue

921

continue

922

if max_depth < ~~idx~~:

922

if max_depth < depth:

923

break

923

break

924

if not revlog.issnapshot(s):

924

if not revlog.issnapshot(s):

925

break

925

break

926

parents_snaps[~~idx~~].add(s)

926

parents_snaps[depth].add(s)

927

# Test them as possible intermediate snapshot base

927

# Test them as possible intermediate snapshot base

928

# We test them from highest to lowest level. High level one are more

928

# We test them from highest to lowest level. High level one are more

929

# likely to result in small delta

929

# likely to result in small delta

930

floor = None

930

floor = None

931

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

931

for idx, snaps in sorted(parents_snaps.items(), reverse=True):

932

siblings = set()

932

siblings = set()

933

for s in snaps:

933

for s in snaps:

934

siblings.update(snapshots[s])

934

siblings.update(snapshots[s])

935

# Before considering making a new intermediate snapshot, we check

935

# Before considering making a new intermediate snapshot, we check

936

# if an existing snapshot, children of base we consider, would be

936

# if an existing snapshot, children of base we consider, would be

937

# suitable.

937

# suitable.

938

#

938

#

939

# It give a change to reuse a delta chain "unrelated" to the

939

# It give a change to reuse a delta chain "unrelated" to the

940

# current revision instead of starting our own. Without such

940

# current revision instead of starting our own. Without such

941

# re-use, topological branches would keep reopening new chains.

941

# re-use, topological branches would keep reopening new chains.

942

# Creating more and more snapshot as the repository grow.

942

# Creating more and more snapshot as the repository grow.

943

944

if floor is not None:

944

if floor is not None:

945

# We only do this for siblings created after the one in our

945

# We only do this for siblings created after the one in our

946

# parent's delta chain. Those created before has less chances

946

# parent's delta chain. Those created before has less chances

947

# to be valid base since our ancestors had to create a new

947

# to be valid base since our ancestors had to create a new

948

# snapshot.

948

# snapshot.

949

siblings = [r for r in siblings if floor < r]

949

siblings = [r for r in siblings if floor < r]

950

yield tuple(sorted(siblings))

950

yield tuple(sorted(siblings))

951

# then test the base from our parent's delta chain.

951

# then test the base from our parent's delta chain.

952

yield tuple(sorted(snaps))

952

yield tuple(sorted(snaps))

953

floor = min(snaps)

953

floor = min(snaps)

954

# No suitable base found in the parent chain, search if any full

954

# No suitable base found in the parent chain, search if any full

955

# snapshots emitted since parent's base would be a suitable base for an

955

# snapshots emitted since parent's base would be a suitable base for an

956

# intermediate snapshot.

956

# intermediate snapshot.

957

#

957

#

958

# It give a chance to reuse a delta chain unrelated to the current

958

# It give a chance to reuse a delta chain unrelated to the current

959

# revisions instead of starting our own. Without such re-use,

959

# revisions instead of starting our own. Without such re-use,

960

# topological branches would keep reopening new full chains. Creating

960

# topological branches would keep reopening new full chains. Creating

961

# more and more snapshot as the repository grow.

961

# more and more snapshot as the repository grow.

962

yield tuple(snapshots[nullrev])

962

yield tuple(snapshots[nullrev])

963

964

if not sparse:

964

if not sparse:

965

# other approach failed try against prev to hopefully save us a

965

# other approach failed try against prev to hopefully save us a

966

# fulltext.

966

# fulltext.

967

yield (prev,)

967

yield (prev,)

968

969

970

class deltacomputer:

970

class deltacomputer:

971

def __init__(

971

def __init__(

972

self,

972

self,

973

revlog,

973

revlog,

974

write_debug=None,

974

write_debug=None,

975

debug_search=False,

975

debug_search=False,

976

debug_info=None,

976

debug_info=None,

977

):

977

):

978

self.revlog = revlog

978

self.revlog = revlog

979

self._write_debug = write_debug

979

self._write_debug = write_debug

980

self._debug_search = debug_search

980

self._debug_search = debug_search

981

self._debug_info = debug_info

981

self._debug_info = debug_info

982

983

def buildtext(self, revinfo, fh):

983

def buildtext(self, revinfo, fh):

984

"""Builds a fulltext version of a revision

984

"""Builds a fulltext version of a revision

985

986

revinfo: revisioninfo instance that contains all needed info

986

revinfo: revisioninfo instance that contains all needed info

987

fh: file handle to either the .i or the .d revlog file,

987

fh: file handle to either the .i or the .d revlog file,

988

depending on whether it is inlined or not

988

depending on whether it is inlined or not

989

"""

989

"""

990

btext = revinfo.btext

990

btext = revinfo.btext

991

if btext[0] is not None:

991

if btext[0] is not None:

992

return btext[0]

992

return btext[0]

993

994

revlog = self.revlog

994

revlog = self.revlog

995

cachedelta = revinfo.cachedelta

995

cachedelta = revinfo.cachedelta

996

baserev = cachedelta[0]

996

baserev = cachedelta[0]

997

delta = cachedelta[1]

997

delta = cachedelta[1]

998

999

fulltext = btext[0] = _textfromdelta(

999

fulltext = btext[0] = _textfromdelta(

1000

fh,

1000

fh,

1001

revlog,

1001

revlog,

1002

baserev,

1002

baserev,

1003

delta,

1003

delta,

1004

revinfo.p1,

1004

revinfo.p1,

1005

revinfo.p2,

1005

revinfo.p2,

1006

revinfo.flags,

1006

revinfo.flags,

1007

revinfo.node,

1007

revinfo.node,

1008

)

1008

)

1009

return fulltext

1009

return fulltext

1010

1011

def _builddeltadiff(self, base, revinfo, fh):

1011

def _builddeltadiff(self, base, revinfo, fh):

1012

revlog = self.revlog

1012

revlog = self.revlog

1013

t = self.buildtext(revinfo, fh)

1013

t = self.buildtext(revinfo, fh)

1014

if revlog.iscensored(base):

1014

if revlog.iscensored(base):

1015

# deltas based on a censored revision must replace the

1015

# deltas based on a censored revision must replace the

1016

# full content in one patch, so delta works everywhere

1016

# full content in one patch, so delta works everywhere

1017

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1017

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

1018

delta = header + t

1018

delta = header + t

1019

else:

1019

else:

1020

ptext = revlog.rawdata(base, _df=fh)

1020

ptext = revlog.rawdata(base, _df=fh)

1021

delta = mdiff.textdiff(ptext, t)

1021

delta = mdiff.textdiff(ptext, t)

1022

1023

return delta

1023

return delta

1024

1025

def _builddeltainfo(self, revinfo, base, fh):

1025

def _builddeltainfo(self, revinfo, base, fh):

1026

# can we use the cached delta?

1026

# can we use the cached delta?

1027

revlog = self.revlog

1027

revlog = self.revlog

1028

debug_search = self._write_debug is not None and self._debug_search

1028

debug_search = self._write_debug is not None and self._debug_search

1029

chainbase = revlog.chainbase(base)

1029

chainbase = revlog.chainbase(base)

1030

if revlog._generaldelta:

1030

if revlog._generaldelta:

1031

deltabase = base

1031

deltabase = base

1032

else:

1032

else:

1033

deltabase = chainbase

1033

deltabase = chainbase

1034

snapshotdepth = None

1034

snapshotdepth = None

1035

if revlog._sparserevlog and deltabase == nullrev:

1035

if revlog._sparserevlog and deltabase == nullrev:

1036

snapshotdepth = 0

1036

snapshotdepth = 0

1037

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1037

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

1038

# A delta chain should always be one full snapshot,

1038

# A delta chain should always be one full snapshot,

1039

# zero or more semi-snapshots, and zero or more deltas

1039

# zero or more semi-snapshots, and zero or more deltas

1040

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1040

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

1041

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1041

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

1042

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1042

snapshotdepth = len(revlog._deltachain(deltabase)[0])

1043

delta = None

1043

delta = None

1044

if revinfo.cachedelta:

1044

if revinfo.cachedelta:

1045

cachebase, cachediff = revinfo.cachedelta

1045

cachebase, cachediff = revinfo.cachedelta

1046

# check if the diff still apply

1046

# check if the diff still apply

1047

currentbase = cachebase

1047

currentbase = cachebase

1048

while (

1048

while (

1049

currentbase != nullrev

1049

currentbase != nullrev

1050

and currentbase != base

1050

and currentbase != base

1051

and self.revlog.length(currentbase) == 0

1051

and self.revlog.length(currentbase) == 0

1052

):

1052

):

1053

currentbase = self.revlog.deltaparent(currentbase)

1053

currentbase = self.revlog.deltaparent(currentbase)

1054

if self.revlog._lazydelta and currentbase == base:

1054

if self.revlog._lazydelta and currentbase == base:

1055

delta = revinfo.cachedelta[1]

1055

delta = revinfo.cachedelta[1]

1056

if delta is None:

1056

if delta is None:

1057

delta = self._builddeltadiff(base, revinfo, fh)

1057

delta = self._builddeltadiff(base, revinfo, fh)

1058

if debug_search:

1058

if debug_search:

1059

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1059

msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"

1060

msg %= len(delta)

1060

msg %= len(delta)

1061

self._write_debug(msg)

1061

self._write_debug(msg)

1062

# snapshotdept need to be neither None nor 0 level snapshot

1062

# snapshotdept need to be neither None nor 0 level snapshot

1063

if revlog.upperboundcomp is not None and snapshotdepth:

1063

if revlog.upperboundcomp is not None and snapshotdepth:

1064

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1064

lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp

1065

snapshotlimit = revinfo.textlen >> snapshotdepth

1065

snapshotlimit = revinfo.textlen >> snapshotdepth

1066

if debug_search:

1066

if debug_search:

1067

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1067

msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"

1068

msg %= lowestrealisticdeltalen

1068

msg %= lowestrealisticdeltalen

1069

self._write_debug(msg)

1069

self._write_debug(msg)

1070

if snapshotlimit < lowestrealisticdeltalen:

1070

if snapshotlimit < lowestrealisticdeltalen:

1071

if debug_search:

1071

if debug_search:

1072

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1072

msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"

1073

self._write_debug(msg)

1073

self._write_debug(msg)

1074

return None

1074

return None

1075

if revlog.length(base) < lowestrealisticdeltalen:

1075

if revlog.length(base) < lowestrealisticdeltalen:

1076

if debug_search:

1076

if debug_search:

1077

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1077

msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"

1078

self._write_debug(msg)

1078

self._write_debug(msg)

1079

return None

1079

return None

1080

header, data = revlog.compress(delta)

1080

header, data = revlog.compress(delta)

1081

deltalen = len(header) + len(data)

1081

deltalen = len(header) + len(data)

1082

offset = revlog.end(len(revlog) - 1)

1082

offset = revlog.end(len(revlog) - 1)

1083

dist = deltalen + offset - revlog.start(chainbase)

1083

dist = deltalen + offset - revlog.start(chainbase)

1084

chainlen, compresseddeltalen = revlog._chaininfo(base)

1084

chainlen, compresseddeltalen = revlog._chaininfo(base)

1085

chainlen += 1

1085

chainlen += 1

1086

compresseddeltalen += deltalen

1086

compresseddeltalen += deltalen

1087

1088

return _deltainfo(

1088

return _deltainfo(

1089

dist,

1089

dist,

1090

deltalen,

1090

deltalen,

1091

(header, data),

1091

(header, data),

1092

deltabase,

1092

deltabase,

1093

chainbase,

1093

chainbase,

1094

chainlen,

1094

chainlen,

1095

compresseddeltalen,

1095

compresseddeltalen,

1096

snapshotdepth,

1096

snapshotdepth,

1097

)

1097

)

1098

1099

def _fullsnapshotinfo(self, fh, revinfo, curr):

1099

def _fullsnapshotinfo(self, fh, revinfo, curr):

1100

rawtext = self.buildtext(revinfo, fh)

1100

rawtext = self.buildtext(revinfo, fh)

1101

data = self.revlog.compress(rawtext)

1101

data = self.revlog.compress(rawtext)

1102

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1102

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

1103

deltabase = chainbase = curr

1103

deltabase = chainbase = curr

1104

snapshotdepth = 0

1104

snapshotdepth = 0

1105

chainlen = 1

1105

chainlen = 1

1106

1107

return _deltainfo(

1107

return _deltainfo(

1108

dist,

1108

dist,

1109

deltalen,

1109

deltalen,

1110

data,

1110

data,

1111

deltabase,

1111

deltabase,

1112

chainbase,

1112

chainbase,

1113

chainlen,

1113

chainlen,

1114

compresseddeltalen,

1114

compresseddeltalen,

1115

snapshotdepth,

1115

snapshotdepth,

1116

)

1116

)

1117

1118

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1118

def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):

1119

"""Find an acceptable delta against a candidate revision

1119

"""Find an acceptable delta against a candidate revision

1120

1121

revinfo: information about the revision (instance of _revisioninfo)

1121

revinfo: information about the revision (instance of _revisioninfo)

1122

fh: file handle to either the .i or the .d revlog file,

1122

fh: file handle to either the .i or the .d revlog file,

1123

depending on whether it is inlined or not

1123

depending on whether it is inlined or not

1124

1125

Returns the first acceptable candidate revision, as ordered by

1125

Returns the first acceptable candidate revision, as ordered by

1126

_candidategroups

1126

_candidategroups

1127

1128

If no suitable deltabase is found, we return delta info for a full

1128

If no suitable deltabase is found, we return delta info for a full

1129

snapshot.

1129

snapshot.

1130

1131

`excluded_bases` is an optional set of revision that cannot be used as

1131

`excluded_bases` is an optional set of revision that cannot be used as

1132

a delta base. Use this to recompute delta suitable in censor or strip

1132

a delta base. Use this to recompute delta suitable in censor or strip

1133

context.

1133

context.

1134

"""

1134

"""

1135

if target_rev is None:

1135

if target_rev is None:

1136

target_rev = len(self.revlog)

1136

target_rev = len(self.revlog)

1137

1138

if not revinfo.textlen:

1138

if not revinfo.textlen:

1139

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1139

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1140

1141

if excluded_bases is None:

1141

if excluded_bases is None:

1142

excluded_bases = set()

1142

excluded_bases = set()

1143

1144

# no delta for flag processor revision (see "candelta" for why)

1144

# no delta for flag processor revision (see "candelta" for why)

1145

# not calling candelta since only one revision needs test, also to

1145

# not calling candelta since only one revision needs test, also to

1146

# avoid overhead fetching flags again.

1146

# avoid overhead fetching flags again.

1147

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1147

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

1148

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1148

return self._fullsnapshotinfo(fh, revinfo, target_rev)

1149

1150

gather_debug = (

1150

gather_debug = (

1151

self._write_debug is not None or self._debug_info is not None

1151

self._write_debug is not None or self._debug_info is not None

1152

)

1152

)

1153

debug_search = self._write_debug is not None and self._debug_search

1153

debug_search = self._write_debug is not None and self._debug_search

1154

1155

if gather_debug:

1155

if gather_debug:

1156

start = util.timer()

1156

start = util.timer()

1157

1158

# count the number of different delta we tried (for debug purpose)

1158

# count the number of different delta we tried (for debug purpose)

1159

dbg_try_count = 0

1159

dbg_try_count = 0

1160

# count the number of "search round" we did. (for debug purpose)

1160

# count the number of "search round" we did. (for debug purpose)

1161

dbg_try_rounds = 0

1161

dbg_try_rounds = 0

1162

dbg_type = b'unknown'

1162

dbg_type = b'unknown'

1163

1164

cachedelta = revinfo.cachedelta

1164

cachedelta = revinfo.cachedelta

1165

p1 = revinfo.p1

1165

p1 = revinfo.p1

1166

p2 = revinfo.p2

1166

p2 = revinfo.p2

1167

revlog = self.revlog

1167

revlog = self.revlog

1168

1169

deltainfo = None

1169

deltainfo = None

1170

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1170

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

1171

1172

if gather_debug:

1172

if gather_debug:

1173

if p1r != nullrev:

1173

if p1r != nullrev:

1174

p1_chain_len = revlog._chaininfo(p1r)[0]

1174

p1_chain_len = revlog._chaininfo(p1r)[0]

1175

else:

1175

else:

1176

p1_chain_len = -1

1176

p1_chain_len = -1

1177

if p2r != nullrev:

1177

if p2r != nullrev:

1178

p2_chain_len = revlog._chaininfo(p2r)[0]

1178

p2_chain_len = revlog._chaininfo(p2r)[0]

1179

else:

1179

else:

1180

p2_chain_len = -1

1180

p2_chain_len = -1

1181

if debug_search:

1181

if debug_search:

1182

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1182

msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"

1183

msg %= target_rev

1183

msg %= target_rev

1184

self._write_debug(msg)

1184

self._write_debug(msg)

1185

1186

groups = _candidategroups(

1186

groups = _candidategroups(

1187

self.revlog,

1187

self.revlog,

1188

revinfo.textlen,

1188

revinfo.textlen,

1189

p1r,

1189

p1r,

1190

p2r,

1190

p2r,

1191

cachedelta,

1191

cachedelta,

1192

excluded_bases,

1192

excluded_bases,

1193

target_rev,

1193

target_rev,

1194

)

1194

)

1195

candidaterevs = next(groups)

1195

candidaterevs = next(groups)

1196

while candidaterevs is not None:

1196

while candidaterevs is not None:

1197

dbg_try_rounds += 1

1197

dbg_try_rounds += 1

1198

if debug_search:

1198

if debug_search:

1199

prev = None

1199

prev = None

1200

if deltainfo is not None:

1200

if deltainfo is not None:

1201

prev = deltainfo.base

1201

prev = deltainfo.base

1202

1203

if (

1203

if (

1204

cachedelta is not None

1204

cachedelta is not None

1205

and len(candidaterevs) == 1

1205

and len(candidaterevs) == 1

1206

and cachedelta[0] in candidaterevs

1206

and cachedelta[0] in candidaterevs

1207

):

1207

):

1208

round_type = b"cached-delta"

1208

round_type = b"cached-delta"

1209

elif p1 in candidaterevs or p2 in candidaterevs:

1209

elif p1 in candidaterevs or p2 in candidaterevs:

1210

round_type = b"parents"

1210

round_type = b"parents"

1211

elif prev is not None and all(c < prev for c in candidaterevs):

1211

elif prev is not None and all(c < prev for c in candidaterevs):

1212

round_type = b"refine-down"

1212

round_type = b"refine-down"

1213

elif prev is not None and all(c > prev for c in candidaterevs):

1213

elif prev is not None and all(c > prev for c in candidaterevs):

1214

round_type = b"refine-up"

1214

round_type = b"refine-up"

1215

else:

1215

else:

1216

round_type = b"search-down"

1216

round_type = b"search-down"

1217

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1217

msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"

1218

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1218

msg %= (dbg_try_rounds, len(candidaterevs), round_type)

1219

self._write_debug(msg)

1219

self._write_debug(msg)

1220

nominateddeltas = []

1220

nominateddeltas = []

1221

if deltainfo is not None:

1221

if deltainfo is not None:

1222

if debug_search:

1222

if debug_search:

1223

msg = (

1223

msg = (

1224

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1224

b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"

1225

)

1225

)

1226

msg %= (deltainfo.base, deltainfo.deltalen)

1226

msg %= (deltainfo.base, deltainfo.deltalen)

1227

self._write_debug(msg)

1227

self._write_debug(msg)

1228

# if we already found a good delta,

1228

# if we already found a good delta,

1229

# challenge it against refined candidates

1229

# challenge it against refined candidates

1230

nominateddeltas.append(deltainfo)

1230

nominateddeltas.append(deltainfo)

1231

for candidaterev in candidaterevs:

1231

for candidaterev in candidaterevs:

1232

if debug_search:

1232

if debug_search:

1233

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1233

msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"

1234

msg %= candidaterev

1234

msg %= candidaterev

1235

self._write_debug(msg)

1235

self._write_debug(msg)

1236

candidate_type = None

1236

candidate_type = None

1237

if candidaterev == p1:

1237

if candidaterev == p1:

1238

candidate_type = b"p1"

1238

candidate_type = b"p1"

1239

elif candidaterev == p2:

1239

elif candidaterev == p2:

1240

candidate_type = b"p2"

1240

candidate_type = b"p2"

1241

elif self.revlog.issnapshot(candidaterev):

1241

elif self.revlog.issnapshot(candidaterev):

1242

candidate_type = b"snapshot-%d"

1242

candidate_type = b"snapshot-%d"

1243

candidate_type %= self.revlog.snapshotdepth(

1243

candidate_type %= self.revlog.snapshotdepth(

1244

candidaterev

1244

candidaterev

1245

)

1245

)

1246

1247

if candidate_type is not None:

1247

if candidate_type is not None:

1248

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1248

msg = b"DBG-DELTAS-SEARCH: type=%s\n"

1249

msg %= candidate_type

1249

msg %= candidate_type

1250

self._write_debug(msg)

1250

self._write_debug(msg)

1251

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1251

msg = b"DBG-DELTAS-SEARCH: size=%d\n"

1252

msg %= self.revlog.length(candidaterev)

1252

msg %= self.revlog.length(candidaterev)

1253

self._write_debug(msg)

1253

self._write_debug(msg)

1254

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1254

msg = b"DBG-DELTAS-SEARCH: base=%d\n"

1255

msg %= self.revlog.deltaparent(candidaterev)

1255

msg %= self.revlog.deltaparent(candidaterev)

1256

self._write_debug(msg)

1256

self._write_debug(msg)

1257

1258

dbg_try_count += 1

1258

dbg_try_count += 1

1259

1260

if debug_search:

1260

if debug_search:

1261

delta_start = util.timer()

1261

delta_start = util.timer()

1262

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1262

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

1263

if debug_search:

1263

if debug_search:

1264

delta_end = util.timer()

1264

delta_end = util.timer()

1265

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1265

msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"

1266

msg %= delta_end - delta_start

1266

msg %= delta_end - delta_start

1267

self._write_debug(msg)

1267

self._write_debug(msg)

1268

if candidatedelta is not None:

1268

if candidatedelta is not None:

1269

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1269

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

1270

if debug_search:

1270

if debug_search:

1271

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1271

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"

1272

msg %= candidatedelta.deltalen

1272

msg %= candidatedelta.deltalen

1273

self._write_debug(msg)

1273

self._write_debug(msg)

1274

nominateddeltas.append(candidatedelta)

1274

nominateddeltas.append(candidatedelta)

1275

elif debug_search:

1275

elif debug_search:

1276

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1276

msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"

1277

msg %= candidatedelta.deltalen

1277

msg %= candidatedelta.deltalen

1278

self._write_debug(msg)

1278

self._write_debug(msg)

1279

elif debug_search:

1279

elif debug_search:

1280

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1280

msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"

1281

self._write_debug(msg)

1281

self._write_debug(msg)

1282

if nominateddeltas:

1282

if nominateddeltas:

1283

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1283

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

1284

if deltainfo is not None:

1284

if deltainfo is not None:

1285

candidaterevs = groups.send(deltainfo.base)

1285

candidaterevs = groups.send(deltainfo.base)

1286

else:

1286

else:

1287

candidaterevs = next(groups)

1287

candidaterevs = next(groups)

1288

1289

if deltainfo is None:

1289

if deltainfo is None:

1290

dbg_type = b"full"

1290

dbg_type = b"full"

1291

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1291

deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)

1292

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1292

elif deltainfo.snapshotdepth: # pytype: disable=attribute-error

1293

dbg_type = b"snapshot"

1293

dbg_type = b"snapshot"

1294

else:

1294

else:

1295

dbg_type = b"delta"

1295

dbg_type = b"delta"

1296

1297

if gather_debug:

1297

if gather_debug:

1298

end = util.timer()

1298

end = util.timer()

1299

used_cached = (

1299

used_cached = (

1300

cachedelta is not None

1300

cachedelta is not None

1301

and dbg_try_rounds == 1

1301

and dbg_try_rounds == 1

1302

and dbg_try_count == 1

1302

and dbg_try_count == 1

1303

and deltainfo.base == cachedelta[0]

1303

and deltainfo.base == cachedelta[0]

1304

)

1304

)

1305

dbg = {

1305

dbg = {

1306

'duration': end - start,

1306

'duration': end - start,

1307

'revision': target_rev,

1307

'revision': target_rev,

1308

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1308

'delta-base': deltainfo.base, # pytype: disable=attribute-error

1309

'search_round_count': dbg_try_rounds,

1309

'search_round_count': dbg_try_rounds,

1310

'using-cached-base': used_cached,

1310

'using-cached-base': used_cached,

1311

'delta_try_count': dbg_try_count,

1311

'delta_try_count': dbg_try_count,

1312

'type': dbg_type,

1312

'type': dbg_type,

1313

'p1-chain-len': p1_chain_len,

1313

'p1-chain-len': p1_chain_len,

1314

'p2-chain-len': p2_chain_len,

1314

'p2-chain-len': p2_chain_len,

1315

}

1315

}

1316

if (

1316

if (

1317

deltainfo.snapshotdepth # pytype: disable=attribute-error

1317

deltainfo.snapshotdepth # pytype: disable=attribute-error

1318

is not None

1318

is not None

1319

):

1319

):

1320

dbg[

1320

dbg[

1321

'snapshot-depth'

1321

'snapshot-depth'

1322

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1322

] = deltainfo.snapshotdepth # pytype: disable=attribute-error

1323

else:

1323

else:

1324

dbg['snapshot-depth'] = 0

1324

dbg['snapshot-depth'] = 0

1325

target_revlog = b"UNKNOWN"

1325

target_revlog = b"UNKNOWN"

1326

target_type = self.revlog.target[0]

1326

target_type = self.revlog.target[0]

1327

target_key = self.revlog.target[1]

1327

target_key = self.revlog.target[1]

1328

if target_type == KIND_CHANGELOG:

1328

if target_type == KIND_CHANGELOG:

1329

target_revlog = b'CHANGELOG:'

1329

target_revlog = b'CHANGELOG:'

1330

elif target_type == KIND_MANIFESTLOG:

1330

elif target_type == KIND_MANIFESTLOG:

1331

target_revlog = b'MANIFESTLOG:'

1331

target_revlog = b'MANIFESTLOG:'

1332

if target_key:

1332

if target_key:

1333

target_revlog += b'%s:' % target_key

1333

target_revlog += b'%s:' % target_key

1334

elif target_type == KIND_FILELOG:

1334

elif target_type == KIND_FILELOG:

1335

target_revlog = b'FILELOG:'

1335

target_revlog = b'FILELOG:'

1336

if target_key:

1336

if target_key:

1337

target_revlog += b'%s:' % target_key

1337

target_revlog += b'%s:' % target_key

1338

dbg['target-revlog'] = target_revlog

1338

dbg['target-revlog'] = target_revlog

1339

1340

if self._debug_info is not None:

1340

if self._debug_info is not None:

1341

self._debug_info.append(dbg)

1341

self._debug_info.append(dbg)

1342

1343

if self._write_debug is not None:

1343

if self._write_debug is not None:

1344

msg = (

1344

msg = (

1345

b"DBG-DELTAS:"

1345

b"DBG-DELTAS:"

1346

b" %-12s"

1346

b" %-12s"

1347

b" rev=%d:"

1347

b" rev=%d:"

1348

b" delta-base=%d"

1348

b" delta-base=%d"

1349

b" is-cached=%d"

1349

b" is-cached=%d"

1350

b" - search-rounds=%d"

1350

b" - search-rounds=%d"

1351

b" try-count=%d"

1351

b" try-count=%d"

1352

b" - delta-type=%-6s"

1352

b" - delta-type=%-6s"

1353

b" snap-depth=%d"

1353

b" snap-depth=%d"

1354

b" - p1-chain-length=%d"

1354

b" - p1-chain-length=%d"

1355

b" p2-chain-length=%d"

1355

b" p2-chain-length=%d"

1356

b" - duration=%f"

1356

b" - duration=%f"

1357

b"\n"

1357

b"\n"

1358

)

1358

)

1359

msg %= (

1359

msg %= (

1360

dbg["target-revlog"],

1360

dbg["target-revlog"],

1361

dbg["revision"],

1361

dbg["revision"],

1362

dbg["delta-base"],

1362

dbg["delta-base"],

1363

dbg["using-cached-base"],

1363

dbg["using-cached-base"],

1364

dbg["search_round_count"],

1364

dbg["search_round_count"],

1365

dbg["delta_try_count"],

1365

dbg["delta_try_count"],

1366

dbg["type"],

1366

dbg["type"],

1367

dbg["snapshot-depth"],

1367

dbg["snapshot-depth"],

1368

dbg["p1-chain-len"],

1368

dbg["p1-chain-len"],

1369

dbg["p2-chain-len"],

1369

dbg["p2-chain-len"],

1370

dbg["duration"],

1370

dbg["duration"],

1371

)

1371

)

1372

self._write_debug(msg)

1372

self._write_debug(msg)

1373

return deltainfo

1373

return deltainfo

1374

1375

1376

def delta_compression(default_compression_header, deltainfo):

1376

def delta_compression(default_compression_header, deltainfo):

1377

"""return (COMPRESSION_MODE, deltainfo)

1377

"""return (COMPRESSION_MODE, deltainfo)

1378

1379

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1379

used by revlog v2+ format to dispatch between PLAIN and DEFAULT

1380

compression.

1380

compression.

1381

"""

1381

"""

1382

h, d = deltainfo.data

1382

h, d = deltainfo.data

1383

compression_mode = COMP_MODE_INLINE

1383

compression_mode = COMP_MODE_INLINE

1384

if not h and not d:

1384

if not h and not d:

1385

# not data to store at all... declare them uncompressed

1385

# not data to store at all... declare them uncompressed

1386

compression_mode = COMP_MODE_PLAIN

1386

compression_mode = COMP_MODE_PLAIN

1387

elif not h:

1387

elif not h:

1388

t = d[0:1]

1388

t = d[0:1]

1389

if t == b'\0':

1389

if t == b'\0':

1390

compression_mode = COMP_MODE_PLAIN

1390

compression_mode = COMP_MODE_PLAIN

1391

elif t == default_compression_header:

1391

elif t == default_compression_header:

1392

compression_mode = COMP_MODE_DEFAULT

1392

compression_mode = COMP_MODE_DEFAULT

1393

elif h == b'u':

1393

elif h == b'u':

1394

# we have a more efficient way to declare uncompressed

1394

# we have a more efficient way to declare uncompressed

1395

h = b''

1395

h = b''

1396

compression_mode = COMP_MODE_PLAIN

1396

compression_mode = COMP_MODE_PLAIN

1397

deltainfo = drop_u_compression(deltainfo)

1397

deltainfo = drop_u_compression(deltainfo)

1398

return compression_mode, deltainfo

1398

return compression_mode, deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             import collections
             import struct
             # import stuff from node for others to import from revlog
             from ..node import nullrev
             from ..i18n import _
             from ..pycompat import getattr
             from .constants import (
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 KIND_CHANGELOG,
                 KIND_FILELOG,
                 KIND_MANIFESTLOG,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import attr
             from .. import (
                 error,
                 mdiff,
                 util,
             )
             from . import flagutil
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog:
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                     self._snapshot = set(snapshot)
                     self.index = None
                 def start(self, rev):
                     if rev == nullrev:
                         return 0
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     if rev == nullrev:
                         return 0
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
                 def issnapshot(self, rev):
                     if rev == nullrev:
                         return True
                     return rev in self._snapshot
             def slicechunk(revlog, revs, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> data = [
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ]
                 >>> revlog = _testrevlog(data, snapshot=range(16))
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 Slicing involving nullrev
                 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
                 [[-1, 0], [11], [13, 15]]
                 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
                 [[-1], [13], [15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
                 if densityslicing is None:
                     densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
                 for chunk in densityslicing(
                     revs, revlog._srdensitythreshold, revlog._srmingapsize
                 ):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> data = [
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ]
                 == All snapshots cases ==
                 >>> revlog = _testrevlog(data, snapshot=range(9))
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == No Snapshot cases ==
                 >>> revlog = _testrevlog(data)
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1], [2, 3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [4, 5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 == mixed case ==
                 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
                 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
                 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 startdata = revlog.start(revs[0])
                 enddata = revlog.end(revs[-1])
                 fullspan = enddata - startdata
                 if targetsize is None or fullspan <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 endrevidx = 1
                 iterrevs = enumerate(revs)
                 next(iterrevs)  # skip first rev.
                 # first step: get snapshots out of the way
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     snapshot = revlog.issnapshot(r)
                     if span <= targetsize and snapshot:
                         endrevidx = idx + 1
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx + 1
                     if not snapshot:
                         break
                 # for the others, we use binary slicing to quickly converge toward valid
                 # chunks (otherwise, we might end up looking for start/end of many
                 # revisions). This logic is not looking for the perfect slicing point, it
                 # focuses on quickly converging toward valid chunks.
                 nbitem = len(revs)
                 while (enddata - startdata) > targetsize:
                     endrevidx = nbitem
                     if nbitem - startrevidx <= 1:
                         break  # protect against individual chunk larger than limit
                     localenddata = revlog.end(revs[endrevidx - 1])
                     span = localenddata - startdata
                     while span > targetsize:
                         if endrevidx - startrevidx <= 1:
                             break  # protect against individual chunk larger than limit
                         endrevidx -= (endrevidx - startrevidx) // 2
                         localenddata = revlog.end(revs[endrevidx - 1])
                         span = localenddata - startdata
                     chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
                     if chunk:
                         yield chunk
                     startrevidx = endrevidx
                     startdata = revlog.start(revs[startrevidx])
                 chunk = _trimchunk(revlog, revs, startrevidx)
                 if chunk:
                     yield chunk
             def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 deltachainspan = segmentspan(revlog, revs)
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gaps = []
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             gaps.append((gapsize, i))
                     prevend = revstart + revlen
                 # sort the gaps to pop them from largest to small
                 gaps.sort()
                 # Collect the indices of the largest holes until the density is acceptable
                 selected = []
                 while gaps and density < targetdensity:
                     gapsize, gapidx = gaps.pop()
                     selected.append(gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= gapsize
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 selected.sort()
                 # Cut the revs at collected indices
                 previdx = 0
                 for idx in selected:
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (
                         endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
                     ):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(b">lll")
                 if delta[:hlen] == mdiff.replacediffheader(
                     revlog.rawsize(baserev), len(delta) - hlen
                 ):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise error.StorageError(
                             _(b'node %s is not censored') % expectednode
                         )
                 except error.CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo:
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def drop_u_compression(delta):
                 """turn into a "u" (no-compression) into no-compression without header
                 This is useful for revlog format that has better compression method.
                 """
                 assert delta.data[0] == b'u', delta.data[0]
                 return _deltainfo(
                     delta.distance,
                     delta.deltalen - 1,
                     (b'', delta.data[1]),
                     delta.base,
                     delta.chainbase,
                     delta.chainlen,
                     delta.compresseddeltalen,
                     delta.snapshotdepth,
                 )
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = deltainfo.distance  # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 #
                 #   In the sparse-revlog case, we rely on the associated "sparse reading"
                 #   to avoid issue related to the span of data. In theory, it would be
                 #   possible to build pathological revlog where delta pattern would lead
                 #   to too many reads. However, they do not happen in practice at all. So
                 #   we skip the span check entirely.
                 if not revlog._sparserevlog and maxdist < deltainfo.distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (
                     deltainfo.snapshotdepth is not None
                     and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
                 ):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (
                     deltainfo.snapshotdepth
                     and revlog.length(deltainfo.base) < deltainfo.deltalen
                 ):
                     return False
                 return True
             # If a revision's full text is that much bigger than a base candidate full
             # text's, it is very unlikely that it will produce a valid delta. We no longer
             # consider these candidates.
             LIMIT_BASE2TEXT = 500
             def _candidategroups(
                 revlog,
                 textlen,
                 p1,
                 p2,
                 cachedelta,
                 excluded_bases=None,
                 target_rev=None,
             ):
                 """Provides group of revision to be tested as delta base
                 This top level function focus on emitting groups with unique and worthwhile
                 content. See _raw_candidate_groups for details about the group order.
                 """
                 # should we try to build a delta?
                 if not (len(revlog) and revlog._storedeltachains):
                     yield None
                     return
                 deltalength = revlog.length
                 deltaparent = revlog.deltaparent
                 sparse = revlog._sparserevlog
                 good = None
                 deltas_limit = textlen * LIMIT_DELTA2TEXT
                 tested = {nullrev}
                 candidates = _refinedgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                 )
                 while True:
                     temptative = candidates.send(good)
                     if temptative is None:
                         break
                     group = []
                     for rev in temptative:
                         # skip over empty delta (no need to include them in a chain)
                         while revlog._generaldelta and not (
                             rev == nullrev or rev in tested or deltalength(rev)
                         ):
                             tested.add(rev)
                             rev = deltaparent(rev)
                         # no need to try a delta against nullrev, this will be done as a
                         # last resort.
                         if rev == nullrev:
                             continue
                         # filter out revision we tested already
                         if rev in tested:
                             continue
                         tested.add(rev)
                         # an higher authority deamed the base unworthy (e.g. censored)
                         if excluded_bases is not None and rev in excluded_bases:
                             continue
                         # We are in some recomputation cases and that rev is too high in
                         # the revlog
                         if target_rev is not None and rev >= target_rev:
                             continue
                         # filter out delta base that will never produce good delta
                         if deltas_limit < revlog.length(rev):
                             continue
                         if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
                             continue
                         # no delta for rawtext-changing revs (see "candelta" for why)
                         if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                             continue
                         # If we reach here, we are about to build and test a delta.
                         # The delta building process will compute the chaininfo in all
                         # case, since that computation is cached, it is fine to access it
                         # here too.
                         chainlen, chainsize = revlog._chaininfo(rev)
                         # if chain will be too long, skip base
                         if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
                             continue
                         # if chain already have too much data, skip base
                         if deltas_limit < chainsize:
                             continue
                         if sparse and revlog.upperboundcomp is not None:
                             maxcomp = revlog.upperboundcomp
                             basenotsnap = (p1, p2, nullrev)
                             if rev not in basenotsnap and revlog.issnapshot(rev):
                                 snapshotdepth = revlog.snapshotdepth(rev)
                                 # If text is significantly larger than the base, we can
                                 # expect the resulting delta to be proportional to the size
                                 # difference
                                 revsize = revlog.rawsize(rev)
                                 rawsizedistance = max(textlen - revsize, 0)
                                 # use an estimate of the compression upper bound.
                                 lowestrealisticdeltalen = rawsizedistance // maxcomp
                                 # check the absolute constraint on the delta size
                                 snapshotlimit = textlen >> snapshotdepth
                                 if snapshotlimit < lowestrealisticdeltalen:
                                     # delta lower bound is larger than accepted upper bound
                                     continue
                                 # check the relative constraint on the delta size
                                 revlength = revlog.length(rev)
                                 if revlength < lowestrealisticdeltalen:
                                     # delta probable lower bound is larger than target base
                                     continue
                         group.append(rev)
                     if group:
                         # XXX: in the sparse revlog case, group can become large,
                         #      impacting performances. Some bounding or slicing mecanism
                         #      would help to reduce this impact.
                         good = yield tuple(group)
                 yield None
             def _findsnapshots(revlog, cache, start_rev):
                 """find snapshot from start_rev to tip"""
                 if util.safehasattr(revlog.index, b'findsnapshots'):
                     revlog.index.findsnapshots(cache, start_rev)
                 else:
                     deltaparent = revlog.deltaparent
                     issnapshot = revlog.issnapshot
                     for rev in revlog.revs(start_rev):
                         if issnapshot(rev):
                             cache[deltaparent(rev)].append(rev)
             def _refinedgroups(revlog, p1, p2, cachedelta):
                 good = None
                 # First we try to reuse a the delta contained in the bundle.
                 # (or from the source revlog)
                 #
                 # This logic only applies to general delta repositories and can be disabled
                 # through configuration. Disabling reuse source delta is useful when
                 # we want to make sure we recomputed "optimal" deltas.
                 debug_info = None
                 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
                     # Assume what we received from the server is a good choice
                     # build delta will reuse the cache
                     if debug_info is not None:
                         debug_info['cached-delta.tested'] += 1
                     good = yield (cachedelta[0],)
                     if good is not None:
                         if debug_info is not None:
                             debug_info['cached-delta.accepted'] += 1
                         yield None
                         return
                 # XXX cache me higher
                 snapshots = collections.defaultdict(list)
                 groups = _rawgroups(
                     revlog,
                     p1,
                     p2,
                     cachedelta,
                     snapshots,
                 )
                 for candidates in groups:
                     good = yield candidates
                     if good is not None:
                         break
                 # If sparse revlog is enabled, we can try to refine the available deltas
                 if not revlog._sparserevlog:
                     yield None
                     return
                 # if we have a refinable value, try to refine it
                 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
                     # refine snapshot down
                     previous = None
                     while previous != good:
                         previous = good
                         base = revlog.deltaparent(good)
                         if base == nullrev:
                             break
                         good = yield (base,)
                     # refine snapshot up
                     if not snapshots:
                         _findsnapshots(revlog, snapshots, good + 1)
                     previous = None
                     while good != previous:
                         previous = good
                         children = tuple(sorted(c for c in snapshots[good]))
                         good = yield children
                 if debug_info is not None:
                     if good is None:
                         debug_info['no-solution'] += 1
                 yield None
             def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
                 """Provides group of revision to be tested as delta base
                 This lower level function focus on emitting delta theorically interresting
                 without looking it any practical details.
                 The group order aims at providing fast or small candidates first.
                 """
                 gdelta = revlog._generaldelta
                 # gate sparse behind general-delta because of issue6056
                 sparse = gdelta and revlog._sparserevlog
                 curr = len(revlog)
                 prev = curr - 1
                 deltachain = lambda rev: revlog._deltachain(rev)[0]
                 if gdelta:
                     # exclude already lazy tested base if any
                     parents = [p for p in (p1, p2) if p != nullrev]
                     if not revlog._deltabothparents and len(parents) == 2:
                         parents.sort()
                         # To minimize the chance of having to build a fulltext,
                         # pick first whichever parent is closest to us (max rev)
                         yield (parents[1],)
                         # then the other one (min rev) if the first did not fit
                         yield (parents[0],)
                     elif len(parents) > 0:
                         # Test all parents (1 or 2), and keep the best candidate
                         yield parents
                 if sparse and parents:
                     if snapshots is None:
                         # map: base-rev: [snapshot-revs]
                         snapshots = collections.defaultdict(list)
                     # See if we can use an existing snapshot in the parent chains to use as
                     # a base for a new intermediate-snapshot
                     #
                     # search for snapshot in parents delta chain
                     # map: snapshot-level: snapshot-rev
                     parents_snaps = collections.defaultdict(set)
                     candidate_chains = [deltachain(p) for p in parents]
                     for chain in candidate_chains:
                         for idx, s in enumerate(chain):
                             if not revlog.issnapshot(s):
                                 break
                             parents_snaps[idx].add(s)
                     snapfloor = min(parents_snaps[0]) + 1
                     _findsnapshots(revlog, snapshots, snapfloor)
                     # search for the highest "unrelated" revision
                     #
                     # Adding snapshots used by "unrelated" revision increase the odd we
                     # reuse an independant, yet better snapshot chain.
                     #
                     # XXX instead of building a set of revisions, we could lazily enumerate
                     # over the chains. That would be more efficient, however we stick to
                     # simple code for now.
                     all_revs = set()
                     for chain in candidate_chains:
                         all_revs.update(chain)
                     other = None
                     for r in revlog.revs(prev, snapfloor):
                         if r not in all_revs:
                             other = r
                             break
                     if other is not None:
                         # To avoid unfair competition, we won't use unrelated intermediate
                         # snapshot that are deeper than the ones from the parent delta
                         # chain.
                         max_depth = max(parents_snaps.keys())
                         chain = deltachain(other)
-                        for idx, s in enumerate(chain):
+                        for depth, s in enumerate(chain):
                             if s < snapfloor:
                                 continue
-                            if max_depth < idx:
+                            if max_depth < depth:
                                 break
                             if not revlog.issnapshot(s):
                                 break
-                            parents_snaps[idx].add(s)
+                            parents_snaps[depth].add(s)
                     # Test them as possible intermediate snapshot base
                     # We test them from highest to lowest level. High level one are more
                     # likely to result in small delta
                     floor = None
                     for idx, snaps in sorted(parents_snaps.items(), reverse=True):
                         siblings = set()
                         for s in snaps:
                             siblings.update(snapshots[s])
                         # Before considering making a new intermediate snapshot, we check
                         # if an existing snapshot, children of base we consider, would be
                         # suitable.
                         #
                         # It give a change to reuse a delta chain "unrelated" to the
                         # current revision instead of starting our own. Without such
                         # re-use, topological branches would keep reopening new chains.
                         # Creating more and more snapshot as the repository grow.
                         if floor is not None:
                             # We only do this for siblings created after the one in our
                             # parent's delta chain. Those created before has less chances
                             # to be valid base since our ancestors had to create a new
                             # snapshot.
                             siblings = [r for r in siblings if floor < r]
                         yield tuple(sorted(siblings))
                         # then test the base from our parent's delta chain.
                         yield tuple(sorted(snaps))
                         floor = min(snaps)
                     # No suitable base found in the parent chain, search if any full
                     # snapshots emitted since parent's base would be a suitable base for an
                     # intermediate snapshot.
                     #
                     # It give a chance to reuse a delta chain unrelated to the current
                     # revisions instead of starting our own. Without such re-use,
                     # topological branches would keep reopening new full chains. Creating
                     # more and more snapshot as the repository grow.
                     yield tuple(snapshots[nullrev])
                 if not sparse:
                     # other approach failed try against prev to hopefully save us a
                     # fulltext.
                     yield (prev,)
             class deltacomputer:
                 def __init__(
                     self,
                     revlog,
                     write_debug=None,
                     debug_search=False,
                     debug_info=None,
                 ):
                     self.revlog = revlog
                     self._write_debug = write_debug
                     self._debug_search = debug_search
                     self._debug_info = debug_info
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(
                         fh,
                         revlog,
                         baserev,
                         delta,
                         revinfo.p1,
                         revinfo.p2,
                         revinfo.flags,
                         revinfo.node,
                     )
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.rawdata(base, _df=fh)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     revlog = self.revlog
                     debug_search = self._write_debug is not None and self._debug_search
                     chainbase = revlog.chainbase(base)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     snapshotdepth = None
                     if revlog._sparserevlog and deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     delta = None
                     if revinfo.cachedelta:
                         cachebase, cachediff = revinfo.cachedelta
                         # check if the diff still apply
                         currentbase = cachebase
                         while (
                             currentbase != nullrev
                             and currentbase != base
                             and self.revlog.length(currentbase) == 0
                         ):
                             currentbase = self.revlog.deltaparent(currentbase)
                         if self.revlog._lazydelta and currentbase == base:
                             delta = revinfo.cachedelta[1]
                     if delta is None:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
                         msg %= len(delta)
                         self._write_debug(msg)
                     # snapshotdept need to be neither None nor 0 level snapshot
                     if revlog.upperboundcomp is not None and snapshotdepth:
                         lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
                         snapshotlimit = revinfo.textlen >> snapshotdepth
                         if debug_search:
                             msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
                             msg %= lowestrealisticdeltalen
                             self._write_debug(msg)
                         if snapshotlimit < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
                                 self._write_debug(msg)
                             return None
                         if revlog.length(base) < lowestrealisticdeltalen:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
                                 self._write_debug(msg)
                             return None
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(
                         dist,
                         deltalen,
                         (header, data),
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def _fullsnapshotinfo(self, fh, revinfo, curr):
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(
                         dist,
                         deltalen,
                         data,
                         deltabase,
                         chainbase,
                         chainlen,
                         compresseddeltalen,
                         snapshotdepth,
                     )
                 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     `excluded_bases` is an optional set of revision that cannot be used as
                     a delta base. Use this to recompute delta suitable in censor or strip
                     context.
                     """
                     if target_rev is None:
                         target_rev = len(self.revlog)
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     if excluded_bases is None:
                         excluded_bases = set()
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo, target_rev)
                     gather_debug = (
                         self._write_debug is not None or self._debug_info is not None
                     )
                     debug_search = self._write_debug is not None and self._debug_search
                     if gather_debug:
                         start = util.timer()
                     # count the number of different delta we tried (for debug purpose)
                     dbg_try_count = 0
                     # count the number of "search round" we did. (for debug purpose)
                     dbg_try_rounds = 0
                     dbg_type = b'unknown'
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     if gather_debug:
                         if p1r != nullrev:
                             p1_chain_len = revlog._chaininfo(p1r)[0]
                         else:
                             p1_chain_len = -1
                         if p2r != nullrev:
                             p2_chain_len = revlog._chaininfo(p2r)[0]
                         else:
                             p2_chain_len = -1
                     if debug_search:
                         msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
                         msg %= target_rev
                         self._write_debug(msg)
                     groups = _candidategroups(
                         self.revlog,
                         revinfo.textlen,
                         p1r,
                         p2r,
                         cachedelta,
                         excluded_bases,
                         target_rev,
                     )
                     candidaterevs = next(groups)
                     while candidaterevs is not None:
                         dbg_try_rounds += 1
                         if debug_search:
                             prev = None
                             if deltainfo is not None:
                                 prev = deltainfo.base
                             if (
                                 cachedelta is not None
                                 and len(candidaterevs) == 1
                                 and cachedelta[0] in candidaterevs
                             ):
                                 round_type = b"cached-delta"
                             elif p1 in candidaterevs or p2 in candidaterevs:
                                 round_type = b"parents"
                             elif prev is not None and all(c < prev for c in candidaterevs):
                                 round_type = b"refine-down"
                             elif prev is not None and all(c > prev for c in candidaterevs):
                                 round_type = b"refine-up"
                             else:
                                 round_type = b"search-down"
                             msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
                             msg %= (dbg_try_rounds, len(candidaterevs), round_type)
                             self._write_debug(msg)
                         nominateddeltas = []
                         if deltainfo is not None:
                             if debug_search:
                                 msg = (
                                     b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
                                 )
                                 msg %= (deltainfo.base, deltainfo.deltalen)
                                 self._write_debug(msg)
                             # if we already found a good delta,
                             # challenge it against refined candidates
                             nominateddeltas.append(deltainfo)
                         for candidaterev in candidaterevs:
                             if debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
                                 msg %= candidaterev
                                 self._write_debug(msg)
                                 candidate_type = None
                                 if candidaterev == p1:
                                     candidate_type = b"p1"
                                 elif candidaterev == p2:
                                     candidate_type = b"p2"
                                 elif self.revlog.issnapshot(candidaterev):
                                     candidate_type = b"snapshot-%d"
                                     candidate_type %= self.revlog.snapshotdepth(
                                         candidaterev
                                     )
                                 if candidate_type is not None:
                                     msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
                                     msg %= candidate_type
                                     self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
                                 msg %= self.revlog.length(candidaterev)
                                 self._write_debug(msg)
                                 msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
                                 msg %= self.revlog.deltaparent(candidaterev)
                                 self._write_debug(msg)
                             dbg_try_count += 1
                             if debug_search:
                                 delta_start = util.timer()
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if debug_search:
                                 delta_end = util.timer()
                                 msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
                                 msg %= delta_end - delta_start
                                 self._write_debug(msg)
                             if candidatedelta is not None:
                                 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                     if debug_search:
                                         msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
                                         msg %= candidatedelta.deltalen
                                         self._write_debug(msg)
                                     nominateddeltas.append(candidatedelta)
                                 elif debug_search:
                                     msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
                                     msg %= candidatedelta.deltalen
                                     self._write_debug(msg)
                             elif debug_search:
                                 msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
                                 self._write_debug(msg)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                         if deltainfo is not None:
                             candidaterevs = groups.send(deltainfo.base)
                         else:
                             candidaterevs = next(groups)
                     if deltainfo is None:
                         dbg_type = b"full"
                         deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
                     elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
                         dbg_type = b"snapshot"
                     else:
                         dbg_type = b"delta"
                     if gather_debug:
                         end = util.timer()
                         used_cached = (
                             cachedelta is not None
                             and dbg_try_rounds == 1
                             and dbg_try_count == 1
                             and deltainfo.base == cachedelta[0]
                         )
                         dbg = {
                             'duration': end - start,
                             'revision': target_rev,
                             'delta-base': deltainfo.base,  # pytype: disable=attribute-error
                             'search_round_count': dbg_try_rounds,
                             'using-cached-base': used_cached,
                             'delta_try_count': dbg_try_count,
                             'type': dbg_type,
                             'p1-chain-len': p1_chain_len,
                             'p2-chain-len': p2_chain_len,
                         }
                         if (
                             deltainfo.snapshotdepth  # pytype: disable=attribute-error
                             is not None
                         ):
                             dbg[
                                 'snapshot-depth'
                             ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
                         else:
                             dbg['snapshot-depth'] = 0
                         target_revlog = b"UNKNOWN"
                         target_type = self.revlog.target[0]
                         target_key = self.revlog.target[1]
                         if target_type == KIND_CHANGELOG:
                             target_revlog = b'CHANGELOG:'
                         elif target_type == KIND_MANIFESTLOG:
                             target_revlog = b'MANIFESTLOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         elif target_type == KIND_FILELOG:
                             target_revlog = b'FILELOG:'
                             if target_key:
                                 target_revlog += b'%s:' % target_key
                         dbg['target-revlog'] = target_revlog
                         if self._debug_info is not None:
                             self._debug_info.append(dbg)
                         if self._write_debug is not None:
                             msg = (
                                 b"DBG-DELTAS:"
                                 b" %-12s"
                                 b" rev=%d:"
                                 b" delta-base=%d"
                                 b" is-cached=%d"
                                 b" - search-rounds=%d"
                                 b" try-count=%d"
                                 b" - delta-type=%-6s"
                                 b" snap-depth=%d"
                                 b" - p1-chain-length=%d"
                                 b" p2-chain-length=%d"
                                 b" - duration=%f"
                                 b"\n"
                             )
                             msg %= (
                                 dbg["target-revlog"],
                                 dbg["revision"],
                                 dbg["delta-base"],
                                 dbg["using-cached-base"],
                                 dbg["search_round_count"],
                                 dbg["delta_try_count"],
                                 dbg["type"],
                                 dbg["snapshot-depth"],
                                 dbg["p1-chain-len"],
                                 dbg["p2-chain-len"],
                                 dbg["duration"],
                             )
                             self._write_debug(msg)
                     return deltainfo
             def delta_compression(default_compression_header, deltainfo):
                 """return (COMPRESSION_MODE, deltainfo)
                 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
                 compression.
                 """
                 h, d = deltainfo.data
                 compression_mode = COMP_MODE_INLINE
                 if not h and not d:
                     # not data to store at all... declare them uncompressed
                     compression_mode = COMP_MODE_PLAIN
                 elif not h:
                     t = d[0:1]
                     if t == b'\0':
                         compression_mode = COMP_MODE_PLAIN
                     elif t == default_compression_header:
                         compression_mode = COMP_MODE_DEFAULT
                 elif h == b'u':
                     # we have a more efficient way to declare uncompressed
                     h = b''
                     compression_mode = COMP_MODE_PLAIN
                     deltainfo = drop_u_compression(deltainfo)
                 return compression_mode, deltainfo