upstream/mercurial-mirror Commit - r39371:1441eb38

1

# revlogdeltas.py - Logic around delta computation for revlog

1

# revlogdeltas.py - Logic around delta computation for revlog

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

"""Helper class to compute deltas stored inside revlogs"""

8

"""Helper class to compute deltas stored inside revlogs"""

9

10

from __future__ import absolute_import

10

from __future__ import absolute_import

11

12

import heapq

12

import heapq

13

import struct

13

import struct

14

15

# import stuff from node for others to import from revlog

15

# import stuff from node for others to import from revlog

16

from ..node import (

16

from ..node import (

17

nullrev,

17

nullrev,

18

)

18

)

19

from ..i18n import _

19

from ..i18n import _

20

21

from .constants import (

21

from .constants import (

22

REVIDX_ISCENSORED,

22

REVIDX_ISCENSORED,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

23

REVIDX_RAWTEXT_CHANGING_FLAGS,

24

)

24

)

25

26

from ..thirdparty import (

26

from ..thirdparty import (

27

attr,

27

attr,

28

)

28

)

29

30

from .. import (

30

from .. import (

31

error,

31

error,

32

mdiff,

32

mdiff,

33

)

33

)

34

35

RevlogError = error.RevlogError

35

RevlogError = error.RevlogError

36

CensoredNodeError = error.CensoredNodeError

36

CensoredNodeError = error.CensoredNodeError

37

38

# maximum <delta-chain-data>/<revision-text-length> ratio

38

# maximum <delta-chain-data>/<revision-text-length> ratio

39

LIMIT_DELTA2TEXT = 2

39

LIMIT_DELTA2TEXT = 2

40

41

class _testrevlog(object):

41

class _testrevlog(object):

42

"""minimalist fake revlog to use in doctests"""

42

"""minimalist fake revlog to use in doctests"""

43

44

def __init__(self, data, density=0.5, mingap=0):

44

def __init__(self, data, density=0.5, mingap=0):

45

"""data is an list of revision payload boundaries"""

45

"""data is an list of revision payload boundaries"""

46

self._data = data

46

self._data = data

47

self._srdensitythreshold = density

47

self._srdensitythreshold = density

48

self._srmingapsize = mingap

48

self._srmingapsize = mingap

49

50

def start(self, rev):

50

def start(self, rev):

51

if rev == 0:

51

if rev == 0:

52

return 0

52

return 0

53

return self._data[rev - 1]

53

return self._data[rev - 1]

54

55

def end(self, rev):

55

def end(self, rev):

56

return self._data[rev]

56

return self._data[rev]

57

58

def length(self, rev):

58

def length(self, rev):

59

return self.end(rev) - self.start(rev)

59

return self.end(rev) - self.start(rev)

60

61

def __len__(self):

61

def __len__(self):

62

return len(self._data)

62

return len(self._data)

63

64

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

64

def slicechunk(revlog, revs, deltainfo=None, targetsize=None):

65

"""slice revs to reduce the amount of unrelated data to be read from disk.

65

"""slice revs to reduce the amount of unrelated data to be read from disk.

66

67

``revs`` is sliced into groups that should be read in one time.

67

``revs`` is sliced into groups that should be read in one time.

68

Assume that revs are sorted.

68

Assume that revs are sorted.

69

70

The initial chunk is sliced until the overall density (payload/chunks-span

70

The initial chunk is sliced until the overall density (payload/chunks-span

71

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

71

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

72

`revlog._srmingapsize` is skipped.

72

`revlog._srmingapsize` is skipped.

73

74

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

74

If `targetsize` is set, no chunk larger than `targetsize` will be yield.

75

For consistency with other slicing choice, this limit won't go lower than

75

For consistency with other slicing choice, this limit won't go lower than

76

`revlog._srmingapsize`.

76

`revlog._srmingapsize`.

77

78

If individual revisions chunk are larger than this limit, they will still

78

If individual revisions chunk are larger than this limit, they will still

79

be raised individually.

79

be raised individually.

80

81

>>> revlog = _testrevlog([

81

>>> revlog = _testrevlog([

82

... 5, #00 (5)

82

... 5, #00 (5)

83

... 10, #01 (5)

83

... 10, #01 (5)

84

... 12, #02 (2)

84

... 12, #02 (2)

85

... 12, #03 (empty)

85

... 12, #03 (empty)

86

... 27, #04 (15)

86

... 27, #04 (15)

87

... 31, #05 (4)

87

... 31, #05 (4)

88

... 31, #06 (empty)

88

... 31, #06 (empty)

89

... 42, #07 (11)

89

... 42, #07 (11)

90

... 47, #08 (5)

90

... 47, #08 (5)

91

... 47, #09 (empty)

91

... 47, #09 (empty)

92

... 48, #10 (1)

92

... 48, #10 (1)

93

... 51, #11 (3)

93

... 51, #11 (3)

94

... 74, #12 (23)

94

... 74, #12 (23)

95

... 85, #13 (11)

95

... 85, #13 (11)

96

... 86, #14 (1)

96

... 86, #14 (1)

97

... 91, #15 (5)

97

... 91, #15 (5)

98

... ])

98

... ])

99

100

>>> list(slicechunk(revlog, list(range(16))))

100

>>> list(slicechunk(revlog, list(range(16))))

101

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

101

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

102

>>> list(slicechunk(revlog, [0, 15]))

102

>>> list(slicechunk(revlog, [0, 15]))

103

[[0], [15]]

103

[[0], [15]]

104

>>> list(slicechunk(revlog, [0, 11, 15]))

104

>>> list(slicechunk(revlog, [0, 11, 15]))

105

[[0], [11], [15]]

105

[[0], [11], [15]]

106

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

106

>>> list(slicechunk(revlog, [0, 11, 13, 15]))

107

[[0], [11, 13, 15]]

107

[[0], [11, 13, 15]]

108

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

108

>>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

109

[[1, 2], [5, 8, 10, 11], [14]]

109

[[1, 2], [5, 8, 10, 11], [14]]

110

111

Slicing with a maximum chunk size

111

Slicing with a maximum chunk size

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

112

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))

113

[[0], [11], [13], [15]]

113

[[0], [11], [13], [15]]

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

114

>>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))

115

[[0], [11], [13, 15]]

115

[[0], [11], [13, 15]]

116

"""

116

"""

117

if targetsize is not None:

117

if targetsize is not None:

118

targetsize = max(targetsize, revlog._srmingapsize)

118

targetsize = max(targetsize, revlog._srmingapsize)

119

# targetsize should not be specified when evaluating delta candidates:

119

# targetsize should not be specified when evaluating delta candidates:

120

# * targetsize is used to ensure we stay within specification when reading,

120

# * targetsize is used to ensure we stay within specification when reading,

121

# * deltainfo is used to pick are good delta chain when writing.

121

# * deltainfo is used to pick are good delta chain when writing.

122

if not (deltainfo is None or targetsize is None):

122

if not (deltainfo is None or targetsize is None):

123

msg = 'cannot use `targetsize` with a `deltainfo`'

123

msg = 'cannot use `targetsize` with a `deltainfo`'

124

raise error.ProgrammingError(msg)

124

raise error.ProgrammingError(msg)

125

for chunk in _slicechunktodensity(revlog, revs,

125

for chunk in _slicechunktodensity(revlog, revs,

126

deltainfo,

126

deltainfo,

127

revlog._srdensitythreshold,

127

revlog._srdensitythreshold,

128

revlog._srmingapsize):

128

revlog._srmingapsize):

129

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

129

for subchunk in _slicechunktosize(revlog, chunk, targetsize):

130

yield subchunk

130

yield subchunk

131

132

def _slicechunktosize(revlog, revs, targetsize=None):

132

def _slicechunktosize(revlog, revs, targetsize=None):

133

"""slice revs to match the target size

133

"""slice revs to match the target size

134

135

This is intended to be used on chunk that density slicing selected by that

135

This is intended to be used on chunk that density slicing selected by that

136

are still too large compared to the read garantee of revlog. This might

136

are still too large compared to the read garantee of revlog. This might

137

happens when "minimal gap size" interrupted the slicing or when chain are

137

happens when "minimal gap size" interrupted the slicing or when chain are

138

built in a way that create large blocks next to each other.

138

built in a way that create large blocks next to each other.

139

140

>>> revlog = _testrevlog([

140

>>> revlog = _testrevlog([

141

... 3, #0 (3)

141

... 3, #0 (3)

142

... 5, #1 (2)

142

... 5, #1 (2)

143

... 6, #2 (1)

143

... 6, #2 (1)

144

... 8, #3 (2)

144

... 8, #3 (2)

145

... 8, #4 (empty)

145

... 8, #4 (empty)

146

... 11, #5 (3)

146

... 11, #5 (3)

147

... 12, #6 (1)

147

... 12, #6 (1)

148

... 13, #7 (1)

148

... 13, #7 (1)

149

... 14, #8 (1)

149

... 14, #8 (1)

150

... ])

150

... ])

151

152

Cases where chunk is already small enough

152

Cases where chunk is already small enough

153

>>> list(_slicechunktosize(revlog, [0], 3))

153

>>> list(_slicechunktosize(revlog, [0], 3))

154

[[0]]

154

[[0]]

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

155

>>> list(_slicechunktosize(revlog, [6, 7], 3))

156

[[6, 7]]

156

[[6, 7]]

157

>>> list(_slicechunktosize(revlog, [0], None))

157

>>> list(_slicechunktosize(revlog, [0], None))

158

[[0]]

158

[[0]]

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

159

>>> list(_slicechunktosize(revlog, [6, 7], None))

160

[[6, 7]]

160

[[6, 7]]

161

162

cases where we need actual slicing

162

cases where we need actual slicing

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

163

>>> list(_slicechunktosize(revlog, [0, 1], 3))

164

[[0], [1]]

164

[[0], [1]]

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

165

>>> list(_slicechunktosize(revlog, [1, 3], 3))

166

[[1], [3]]

166

[[1], [3]]

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

167

>>> list(_slicechunktosize(revlog, [1, 2, 3], 3))

168

[[1, 2], [3]]

168

[[1, 2], [3]]

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

169

>>> list(_slicechunktosize(revlog, [3, 5], 3))

170

[[3], [5]]

170

[[3], [5]]

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

171

>>> list(_slicechunktosize(revlog, [3, 4, 5], 3))

172

[[3], [5]]

172

[[3], [5]]

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

173

>>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))

174

[[5], [6, 7, 8]]

174

[[5], [6, 7, 8]]

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

175

>>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

176

[[0], [1, 2], [3], [5], [6, 7, 8]]

177

178

Case with too large individual chunk (must return valid chunk)

178

Case with too large individual chunk (must return valid chunk)

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

179

>>> list(_slicechunktosize(revlog, [0, 1], 2))

180

[[0], [1]]

180

[[0], [1]]

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

181

>>> list(_slicechunktosize(revlog, [1, 3], 1))

182

[[1], [3]]

182

[[1], [3]]

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

183

>>> list(_slicechunktosize(revlog, [3, 4, 5], 2))

184

[[3], [5]]

184

[[3], [5]]

185

"""

185

"""

186

assert targetsize is None or 0 <= targetsize

186

assert targetsize is None or 0 <= targetsize

187

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

187

if targetsize is None or segmentspan(revlog, revs) <= targetsize:

188

yield revs

188

yield revs

189

return

189

return

190

191

startrevidx = 0

191

startrevidx = 0

192

startdata = revlog.start(revs[0])

192

startdata = revlog.start(revs[0])

193

endrevidx = 0

193

endrevidx = 0

194

iterrevs = enumerate(revs)

194

iterrevs = enumerate(revs)

195

next(iterrevs) # skip first rev.

195

next(iterrevs) # skip first rev.

196

for idx, r in iterrevs:

196

for idx, r in iterrevs:

197

span = revlog.end(r) - startdata

197

span = revlog.end(r) - startdata

198

if span <= targetsize:

198

if span <= targetsize:

199

endrevidx = idx

199

endrevidx = idx

200

else:

200

else:

201

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

201

chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)

202

if chunk:

202

if chunk:

203

yield chunk

203

yield chunk

204

startrevidx = idx

204

startrevidx = idx

205

startdata = revlog.start(r)

205

startdata = revlog.start(r)

206

endrevidx = idx

206

endrevidx = idx

207

yield _trimchunk(revlog, revs, startrevidx)

207

yield _trimchunk(revlog, revs, startrevidx)

208

209

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

209

def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,

210

mingapsize=0):

210

mingapsize=0):

211

"""slice revs to reduce the amount of unrelated data to be read from disk.

211

"""slice revs to reduce the amount of unrelated data to be read from disk.

212

213

``revs`` is sliced into groups that should be read in one time.

213

``revs`` is sliced into groups that should be read in one time.

214

Assume that revs are sorted.

214

Assume that revs are sorted.

215

216

``deltainfo`` is a _deltainfo instance of a revision that we would append

216

``deltainfo`` is a _deltainfo instance of a revision that we would append

217

to the top of the revlog.

217

to the top of the revlog.

218

219

The initial chunk is sliced until the overall density (payload/chunks-span

219

The initial chunk is sliced until the overall density (payload/chunks-span

220

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

220

ratio) is above `targetdensity`. No gap smaller than `mingapsize` is

221

skipped.

221

skipped.

222

223

>>> revlog = _testrevlog([

223

>>> revlog = _testrevlog([

224

... 5, #00 (5)

224

... 5, #00 (5)

225

... 10, #01 (5)

225

... 10, #01 (5)

226

... 12, #02 (2)

226

... 12, #02 (2)

227

... 12, #03 (empty)

227

... 12, #03 (empty)

228

... 27, #04 (15)

228

... 27, #04 (15)

229

... 31, #05 (4)

229

... 31, #05 (4)

230

... 31, #06 (empty)

230

... 31, #06 (empty)

231

... 42, #07 (11)

231

... 42, #07 (11)

232

... 47, #08 (5)

232

... 47, #08 (5)

233

... 47, #09 (empty)

233

... 47, #09 (empty)

234

... 48, #10 (1)

234

... 48, #10 (1)

235

... 51, #11 (3)

235

... 51, #11 (3)

236

... 74, #12 (23)

236

... 74, #12 (23)

237

... 85, #13 (11)

237

... 85, #13 (11)

238

... 86, #14 (1)

238

... 86, #14 (1)

239

... 91, #15 (5)

239

... 91, #15 (5)

240

... ])

240

... ])

241

242

>>> list(_slicechunktodensity(revlog, list(range(16))))

242

>>> list(_slicechunktodensity(revlog, list(range(16))))

243

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

243

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

244

>>> list(_slicechunktodensity(revlog, [0, 15]))

244

>>> list(_slicechunktodensity(revlog, [0, 15]))

245

[[0], [15]]

245

[[0], [15]]

246

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

246

>>> list(_slicechunktodensity(revlog, [0, 11, 15]))

247

[[0], [11], [15]]

247

[[0], [11], [15]]

248

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

248

>>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))

249

[[0], [11, 13, 15]]

249

[[0], [11, 13, 15]]

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

250

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

251

[[1, 2], [5, 8, 10, 11], [14]]

251

[[1, 2], [5, 8, 10, 11], [14]]

252

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

252

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

253

... mingapsize=20))

253

... mingapsize=20))

254

[[1, 2, 3, 5, 8, 10, 11], [14]]

254

[[1, 2, 3, 5, 8, 10, 11], [14]]

255

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

255

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

256

... targetdensity=0.95))

256

... targetdensity=0.95))

257

[[1, 2], [5], [8, 10, 11], [14]]

257

[[1, 2], [5], [8, 10, 11], [14]]

258

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

258

>>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],

259

... targetdensity=0.95, mingapsize=12))

259

... targetdensity=0.95, mingapsize=12))

260

[[1, 2], [5, 8, 10, 11], [14]]

260

[[1, 2], [5, 8, 10, 11], [14]]

261

"""

261

"""

262

start = revlog.start

262

start = revlog.start

263

length = revlog.length

263

length = revlog.length

264

265

if len(revs) <= 1:

265

if len(revs) <= 1:

266

yield revs

266

yield revs

267

return

267

return

268

269

nextrev = len(revlog)

269

nextrev = len(revlog)

270

nextoffset = revlog.end(nextrev - 1)

270

nextoffset = revlog.end(nextrev - 1)

271

272

if deltainfo is None:

272

if deltainfo is None:

273

deltachainspan = segmentspan(revlog, revs)

273

deltachainspan = segmentspan(revlog, revs)

274

chainpayload = sum(length(r) for r in revs)

274

chainpayload = sum(length(r) for r in revs)

275

else:

275

else:

276

deltachainspan = deltainfo.distance

276

deltachainspan = deltainfo.distance

277

chainpayload = deltainfo.compresseddeltalen

277

chainpayload = deltainfo.compresseddeltalen

278

279

if deltachainspan < mingapsize:

279

if deltachainspan < mingapsize:

280

yield revs

280

yield revs

281

return

281

return

282

283

readdata = deltachainspan

283

readdata = deltachainspan

284

285

if deltachainspan:

285

if deltachainspan:

286

density = chainpayload / float(deltachainspan)

286

density = chainpayload / float(deltachainspan)

287

else:

287

else:

288

density = 1.0

288

density = 1.0

289

290

if density >= targetdensity:

290

if density >= targetdensity:

291

yield revs

291

yield revs

292

return

292

return

293

294

if deltainfo is not None and deltainfo.deltalen:

294

if deltainfo is not None and deltainfo.deltalen:

295

revs = list(revs)

295

revs = list(revs)

296

revs.append(nextrev)

296

revs.append(nextrev)

297

298

# Store the gaps in a heap to have them sorted by decreasing size

298

# Store the gaps in a heap to have them sorted by decreasing size

299

gapsheap = []

299

gapsheap = []

300

heapq.heapify(gapsheap)

300

heapq.heapify(gapsheap)

301

prevend = None

301

prevend = None

302

for i, rev in enumerate(revs):

302

for i, rev in enumerate(revs):

303

if rev < nextrev:

303

if rev < nextrev:

304

revstart = start(rev)

304

revstart = start(rev)

305

revlen = length(rev)

305

revlen = length(rev)

306

else:

306

else:

307

revstart = nextoffset

307

revstart = nextoffset

308

revlen = deltainfo.deltalen

308

revlen = deltainfo.deltalen

309

310

# Skip empty revisions to form larger holes

310

# Skip empty revisions to form larger holes

311

if revlen == 0:

311

if revlen == 0:

312

continue

312

continue

313

314

if prevend is not None:

314

if prevend is not None:

315

gapsize = revstart - prevend

315

gapsize = revstart - prevend

316

# only consider holes that are large enough

316

# only consider holes that are large enough

317

if gapsize > mingapsize:

317

if gapsize > mingapsize:

318

heapq.heappush(gapsheap, (-gapsize, i))

318

heapq.heappush(gapsheap, (-gapsize, i))

319

320

prevend = revstart + revlen

320

prevend = revstart + revlen

321

322

# Collect the indices of the largest holes until the density is acceptable

322

# Collect the indices of the largest holes until the density is acceptable

323

indicesheap = []

323

indicesheap = []

324

heapq.heapify(indicesheap)

324

heapq.heapify(indicesheap)

325

while gapsheap and density < targetdensity:

325

while gapsheap and density < targetdensity:

326

oppgapsize, gapidx = heapq.heappop(gapsheap)

326

oppgapsize, gapidx = heapq.heappop(gapsheap)

327

328

heapq.heappush(indicesheap, gapidx)

328

heapq.heappush(indicesheap, gapidx)

329

330

# the gap sizes are stored as negatives to be sorted decreasingly

330

# the gap sizes are stored as negatives to be sorted decreasingly

331

# by the heap

331

# by the heap

332

readdata -= (-oppgapsize)

332

readdata -= (-oppgapsize)

333

if readdata > 0:

333

if readdata > 0:

334

density = chainpayload / float(readdata)

334

density = chainpayload / float(readdata)

335

else:

335

else:

336

density = 1.0

336

density = 1.0

337

338

# Cut the revs at collected indices

338

# Cut the revs at collected indices

339

previdx = 0

339

previdx = 0

340

while indicesheap:

340

while indicesheap:

341

idx = heapq.heappop(indicesheap)

341

idx = heapq.heappop(indicesheap)

342

343

chunk = _trimchunk(revlog, revs, previdx, idx)

343

chunk = _trimchunk(revlog, revs, previdx, idx)

344

if chunk:

344

if chunk:

345

yield chunk

345

yield chunk

346

347

previdx = idx

347

previdx = idx

348

349

chunk = _trimchunk(revlog, revs, previdx)

349

chunk = _trimchunk(revlog, revs, previdx)

350

if chunk:

350

if chunk:

351

yield chunk

351

yield chunk

352

353

def _trimchunk(revlog, revs, startidx, endidx=None):

353

def _trimchunk(revlog, revs, startidx, endidx=None):

354

"""returns revs[startidx:endidx] without empty trailing revs

354

"""returns revs[startidx:endidx] without empty trailing revs

355

356

Doctest Setup

356

Doctest Setup

357

>>> revlog = _testrevlog([

357

>>> revlog = _testrevlog([

358

... 5, #0

358

... 5, #0

359

... 10, #1

359

... 10, #1

360

... 12, #2

360

... 12, #2

361

... 12, #3 (empty)

361

... 12, #3 (empty)

362

... 17, #4

362

... 17, #4

363

... 21, #5

363

... 21, #5

364

... 21, #6 (empty)

364

... 21, #6 (empty)

365

... ])

365

... ])

366

367

Contiguous cases:

367

Contiguous cases:

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

368

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

369

[0, 1, 2, 3, 4, 5]

369

[0, 1, 2, 3, 4, 5]

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

370

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

371

[0, 1, 2, 3, 4]

371

[0, 1, 2, 3, 4]

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

372

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

373

[0, 1, 2]

373

[0, 1, 2]

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

374

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

375

[2]

375

[2]

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

376

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

377

[3, 4, 5]

377

[3, 4, 5]

378

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

378

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

379

[3, 4]

379

[3, 4]

380

381

Discontiguous cases:

381

Discontiguous cases:

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

382

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

383

[1, 3, 5]

383

[1, 3, 5]

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

384

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

385

[1]

385

[1]

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

386

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

387

[3, 5]

387

[3, 5]

388

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

388

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

389

[3, 5]

389

[3, 5]

390

"""

390

"""

391

length = revlog.length

391

length = revlog.length

392

393

if endidx is None:

393

if endidx is None:

394

endidx = len(revs)

394

endidx = len(revs)

395

396

# If we have a non-emtpy delta candidate, there are nothing to trim

396

# If we have a non-emtpy delta candidate, there are nothing to trim

397

if revs[endidx - 1] < len(revlog):

397

if revs[endidx - 1] < len(revlog):

398

# Trim empty revs at the end, except the very first revision of a chain

398

# Trim empty revs at the end, except the very first revision of a chain

399

while (endidx > 1

399

while (endidx > 1

400

and endidx > startidx

400

and endidx > startidx

401

and length(revs[endidx - 1]) == 0):

401

and length(revs[endidx - 1]) == 0):

402

endidx -= 1

402

endidx -= 1

403

404

return revs[startidx:endidx]

404

return revs[startidx:endidx]

405

406

def segmentspan(revlog, revs, deltainfo=None):

406

def segmentspan(revlog, revs, deltainfo=None):

407

"""Get the byte span of a segment of revisions

407

"""Get the byte span of a segment of revisions

408

409

revs is a sorted array of revision numbers

409

revs is a sorted array of revision numbers

410

411

>>> revlog = _testrevlog([

411

>>> revlog = _testrevlog([

412

... 5, #0

412

... 5, #0

413

... 10, #1

413

... 10, #1

414

... 12, #2

414

... 12, #2

415

... 12, #3 (empty)

415

... 12, #3 (empty)

416

... 17, #4

416

... 17, #4

417

... ])

417

... ])

418

419

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

419

>>> segmentspan(revlog, [0, 1, 2, 3, 4])

420

17

420

17

421

>>> segmentspan(revlog, [0, 4])

421

>>> segmentspan(revlog, [0, 4])

422

17

422

17

423

>>> segmentspan(revlog, [3, 4])

423

>>> segmentspan(revlog, [3, 4])

424

5

424

5

425

>>> segmentspan(revlog, [1, 2, 3,])

425

>>> segmentspan(revlog, [1, 2, 3,])

426

7

426

7

427

>>> segmentspan(revlog, [1, 3])

427

>>> segmentspan(revlog, [1, 3])

428

7

428

7

429

"""

429

"""

430

if not revs:

430

if not revs:

431

return 0

431

return 0

432

if deltainfo is not None and len(revlog) <= revs[-1]:

432

if deltainfo is not None and len(revlog) <= revs[-1]:

433

if len(revs) == 1:

433

if len(revs) == 1:

434

return deltainfo.deltalen

434

return deltainfo.deltalen

435

offset = revlog.end(len(revlog) - 1)

435

offset = revlog.end(len(revlog) - 1)

436

end = deltainfo.deltalen + offset

436

end = deltainfo.deltalen + offset

437

else:

437

else:

438

end = revlog.end(revs[-1])

438

end = revlog.end(revs[-1])

439

return end - revlog.start(revs[0])

439

return end - revlog.start(revs[0])

440

441

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

441

def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):

442

"""build full text from a (base, delta) pair and other metadata"""

442

"""build full text from a (base, delta) pair and other metadata"""

443

# special case deltas which replace entire base; no need to decode

443

# special case deltas which replace entire base; no need to decode

444

# base revision. this neatly avoids censored bases, which throw when

444

# base revision. this neatly avoids censored bases, which throw when

445

# they're decoded.

445

# they're decoded.

446

hlen = struct.calcsize(">lll")

446

hlen = struct.calcsize(">lll")

447

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

447

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

448

len(delta) - hlen):

448

len(delta) - hlen):

449

fulltext = delta[hlen:]

449

fulltext = delta[hlen:]

450

else:

450

else:

451

# deltabase is rawtext before changed by flag processors, which is

451

# deltabase is rawtext before changed by flag processors, which is

452

# equivalent to non-raw text

452

# equivalent to non-raw text

453

basetext = revlog.revision(baserev, _df=fh, raw=False)

453

basetext = revlog.revision(baserev, _df=fh, raw=False)

454

fulltext = mdiff.patch(basetext, delta)

454

fulltext = mdiff.patch(basetext, delta)

455

456

try:

456

try:

457

res = revlog._processflags(fulltext, flags, 'read', raw=True)

457

res = revlog._processflags(fulltext, flags, 'read', raw=True)

458

fulltext, validatehash = res

458

fulltext, validatehash = res

459

if validatehash:

459

if validatehash:

460

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

460

revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)

461

if flags & REVIDX_ISCENSORED:

461

if flags & REVIDX_ISCENSORED:

462

raise RevlogError(_('node %s is not censored') % expectednode)

462

raise RevlogError(_('node %s is not censored') % expectednode)

463

except CensoredNodeError:

463

except CensoredNodeError:

464

# must pass the censored index flag to add censored revisions

464

# must pass the censored index flag to add censored revisions

465

if not flags & REVIDX_ISCENSORED:

465

if not flags & REVIDX_ISCENSORED:

466

raise

466

raise

467

return fulltext

467

return fulltext

468

469

@attr.s(slots=True, frozen=True)

469

@attr.s(slots=True, frozen=True)

470

class _deltainfo(object):

470

class _deltainfo(object):

471

distance = attr.ib()

471

distance = attr.ib()

472

deltalen = attr.ib()

472

deltalen = attr.ib()

473

data = attr.ib()

473

data = attr.ib()

474

base = attr.ib()

474

base = attr.ib()

475

chainbase = attr.ib()

475

chainbase = attr.ib()

476

chainlen = attr.ib()

476

chainlen = attr.ib()

477

compresseddeltalen = attr.ib()

477

compresseddeltalen = attr.ib()

478

snapshotdepth = attr.ib()

478

snapshotdepth = attr.ib()

479

480

def isgooddeltainfo(revlog, deltainfo, revinfo):

480

def isgooddeltainfo(revlog, deltainfo, revinfo):

481

"""Returns True if the given delta is good. Good means that it is within

481

"""Returns True if the given delta is good. Good means that it is within

482

the disk span, disk size, and chain length bounds that we know to be

482

the disk span, disk size, and chain length bounds that we know to be

483

performant."""

483

performant."""

484

if deltainfo is None:

484

if deltainfo is None:

485

return False

485

return False

486

487

# - 'deltainfo.distance' is the distance from the base revision --

487

# - 'deltainfo.distance' is the distance from the base revision --

488

# bounding it limits the amount of I/O we need to do.

488

# bounding it limits the amount of I/O we need to do.

489

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

489

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

490

# deltas we need to apply -- bounding it limits the amount of CPU

490

# deltas we need to apply -- bounding it limits the amount of CPU

491

# we consume.

491

# we consume.

492

493

if revlog._sparserevlog:

493

if revlog._sparserevlog:

494

# As sparse-read will be used, we can consider that the distance,

494

# As sparse-read will be used, we can consider that the distance,

495

# instead of being the span of the whole chunk,

495

# instead of being the span of the whole chunk,

496

# is the span of the largest read chunk

496

# is the span of the largest read chunk

497

base = deltainfo.base

497

base = deltainfo.base

498

499

if base != nullrev:

499

if base != nullrev:

500

deltachain = revlog._deltachain(base)[0]

500

deltachain = revlog._deltachain(base)[0]

501

else:

501

else:

502

deltachain = []

502

deltachain = []

503

504

# search for the first non-snapshot revision

504

# search for the first non-snapshot revision

505

for idx, r in enumerate(deltachain):

505

for idx, r in enumerate(deltachain):

506

if not revlog.issnapshot(r):

506

if not revlog.issnapshot(r):

507

break

507

break

508

deltachain = deltachain[idx:]

508

deltachain = deltachain[idx:]

509

chunks = slicechunk(revlog, deltachain, deltainfo)

509

chunks = slicechunk(revlog, deltachain, deltainfo)

510

all_span = [segmentspan(revlog, revs, deltainfo)

510

all_span = [segmentspan(revlog, revs, deltainfo)

511

for revs in chunks]

511

for revs in chunks]

512

distance = max(all_span)

512

distance = max(all_span)

513

else:

513

else:

514

distance = deltainfo.distance

514

distance = deltainfo.distance

515

516

textlen = revinfo.textlen

516

textlen = revinfo.textlen

517

defaultmax = textlen * 4

517

defaultmax = textlen * 4

518

maxdist = revlog._maxdeltachainspan

518

maxdist = revlog._maxdeltachainspan

519

if not maxdist:

519

if not maxdist:

520

maxdist = distance # ensure the conditional pass

520

maxdist = distance # ensure the conditional pass

521

maxdist = max(maxdist, defaultmax)

521

maxdist = max(maxdist, defaultmax)

522

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

522

if revlog._sparserevlog and maxdist < revlog._srmingapsize:

523

# In multiple place, we are ignoring irrelevant data range below a

523

# In multiple place, we are ignoring irrelevant data range below a

524

# certain size. Be also apply this tradeoff here and relax span

524

# certain size. Be also apply this tradeoff here and relax span

525

# constraint for small enought content.

525

# constraint for small enought content.

526

maxdist = revlog._srmingapsize

526

maxdist = revlog._srmingapsize

527

528

# Bad delta from read span:

528

# Bad delta from read span:

529

#

529

#

530

# If the span of data read is larger than the maximum allowed.

530

# If the span of data read is larger than the maximum allowed.

531

if maxdist < distance:

531

if maxdist < distance:

532

return False

532

return False

533

534

# Bad delta from new delta size:

534

# Bad delta from new delta size:

535

#

535

#

536

# If the delta size is larger than the target text, storing the

536

# If the delta size is larger than the target text, storing the

537

# delta will be inefficient.

537

# delta will be inefficient.

538

if textlen < deltainfo.deltalen:

538

if textlen < deltainfo.deltalen:

539

return False

539

return False

540

541

# Bad delta from cumulated payload size:

541

# Bad delta from cumulated payload size:

542

#

542

#

543

# If the sum of delta get larger than K * target text length.

543

# If the sum of delta get larger than K * target text length.

544

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

544

if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:

545

return False

545

return False

546

547

# Bad delta from chain length:

547

# Bad delta from chain length:

548

#

548

#

549

# If the number of delta in the chain gets too high.

549

# If the number of delta in the chain gets too high.

550

if (revlog._maxchainlen

550

if (revlog._maxchainlen

551

and revlog._maxchainlen < deltainfo.chainlen):

551

and revlog._maxchainlen < deltainfo.chainlen):

552

return False

552

return False

553

554

# bad delta from intermediate snapshot size limit

554

# bad delta from intermediate snapshot size limit

555

#

555

#

556

# If an intermediate snapshot size is higher than the limit. The

556

# If an intermediate snapshot size is higher than the limit. The

557

# limit exist to prevent endless chain of intermediate delta to be

557

# limit exist to prevent endless chain of intermediate delta to be

558

# created.

558

# created.

559

if (deltainfo.snapshotdepth is not None and

559

if (deltainfo.snapshotdepth is not None and

560

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

560

(textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):

561

return False

561

return False

562

563

# bad delta if new intermediate snapshot is larger than the previous

563

# bad delta if new intermediate snapshot is larger than the previous

564

# snapshot

564

# snapshot

565

if (deltainfo.snapshotdepth

565

if (deltainfo.snapshotdepth

566

and revlog.length(deltainfo.base) < deltainfo.deltalen):

566

and revlog.length(deltainfo.base) < deltainfo.deltalen):

567

return False

567

return False

568

569

return True

569

return True

570

571

def _candidategroups(revlog, p1, p2, cachedelta):

571

def _candidategroups(revlog, p1, p2, cachedelta):

572

"""

572

"""

573

Provides revisions that present an interest to be diffed against,

573

Provides revisions that present an interest to be diffed against,

574

grouped by level of easiness.

574

grouped by level of easiness.

575

"""

575

"""

576

gdelta = revlog._generaldelta

576

gdelta = revlog._generaldelta

577

curr = len(revlog)

577

curr = len(revlog)

578

prev = curr - 1

578

prev = curr - 1

579

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

580

579

581

# should we try to build a delta?

580

# should we try to build a delta?

582

if prev != nullrev and revlog._storedeltachains:

581

if prev != nullrev and revlog._storedeltachains:

583

tested = set()

582

tested = set()

584

# This condition is true most of the time when processing

583

# This condition is true most of the time when processing

585

# changegroup data into a generaldelta repo. The only time it

584

# changegroup data into a generaldelta repo. The only time it

586

# isn't true is if this is the first revision in a delta chain

585

# isn't true is if this is the first revision in a delta chain

587

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

586

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

588

if cachedelta and gdelta and revlog._lazydeltabase:

587

if cachedelta and gdelta and revlog._lazydeltabase:

589

# Assume what we received from the server is a good choice

588

# Assume what we received from the server is a good choice

590

# build delta will reuse the cache

589

# build delta will reuse the cache

591

yield (cachedelta[0],)

590

yield (cachedelta[0],)

592

tested.add(cachedelta[0])

591

tested.add(cachedelta[0])

593

592

594

if gdelta:

593

if gdelta:

595

# exclude already lazy tested base if any

594

# exclude already lazy tested base if any

596

parents = [p for p in (p1r, p2r)

595

parents = [p for p in (p1, p2)

597

if p != nullrev and p not in tested]

596

if p != nullrev and p not in tested]

598

597

599

if not revlog._deltabothparents and len(parents) == 2:

598

if not revlog._deltabothparents and len(parents) == 2:

600

parents.sort()

599

parents.sort()

601

# To minimize the chance of having to build a fulltext,

600

# To minimize the chance of having to build a fulltext,

602

# pick first whichever parent is closest to us (max rev)

601

# pick first whichever parent is closest to us (max rev)

603

yield (parents[1],)

602

yield (parents[1],)

604

# then the other one (min rev) if the first did not fit

603

# then the other one (min rev) if the first did not fit

605

yield (parents[0],)

604

yield (parents[0],)

606

tested.update(parents)

605

tested.update(parents)

607

elif len(parents) > 0:

606

elif len(parents) > 0:

608

# Test all parents (1 or 2), and keep the best candidate

607

# Test all parents (1 or 2), and keep the best candidate

609

yield parents

608

yield parents

610

tested.update(parents)

609

tested.update(parents)

611

610

612

if prev not in tested:

611

if prev not in tested:

613

# other approach failed try against prev to hopefully save us a

612

# other approach failed try against prev to hopefully save us a

614

# fulltext.

613

# fulltext.

615

yield (prev,)

614

yield (prev,)

616

tested.add(prev)

615

tested.add(prev)

617

616

618

class deltacomputer(object):

617

class deltacomputer(object):

619

def __init__(self, revlog):

618

def __init__(self, revlog):

620

self.revlog = revlog

619

self.revlog = revlog

621

620

622

def buildtext(self, revinfo, fh):

621

def buildtext(self, revinfo, fh):

623

"""Builds a fulltext version of a revision

622

"""Builds a fulltext version of a revision

624

623

625

revinfo: _revisioninfo instance that contains all needed info

624

revinfo: _revisioninfo instance that contains all needed info

626

fh: file handle to either the .i or the .d revlog file,

625

fh: file handle to either the .i or the .d revlog file,

627

depending on whether it is inlined or not

626

depending on whether it is inlined or not

628

"""

627

"""

629

btext = revinfo.btext

628

btext = revinfo.btext

630

if btext[0] is not None:

629

if btext[0] is not None:

631

return btext[0]

630

return btext[0]

632

631

633

revlog = self.revlog

632

revlog = self.revlog

634

cachedelta = revinfo.cachedelta

633

cachedelta = revinfo.cachedelta

635

baserev = cachedelta[0]

634

baserev = cachedelta[0]

636

delta = cachedelta[1]

635

delta = cachedelta[1]

637

636

638

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

637

fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,

639

revinfo.p1, revinfo.p2,

638

revinfo.p1, revinfo.p2,

640

revinfo.flags, revinfo.node)

639

revinfo.flags, revinfo.node)

641

return fulltext

640

return fulltext

642

641

643

def _builddeltadiff(self, base, revinfo, fh):

642

def _builddeltadiff(self, base, revinfo, fh):

644

revlog = self.revlog

643

revlog = self.revlog

645

t = self.buildtext(revinfo, fh)

644

t = self.buildtext(revinfo, fh)

646

if revlog.iscensored(base):

645

if revlog.iscensored(base):

647

# deltas based on a censored revision must replace the

646

# deltas based on a censored revision must replace the

648

# full content in one patch, so delta works everywhere

647

# full content in one patch, so delta works everywhere

649

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

648

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

650

delta = header + t

649

delta = header + t

651

else:

650

else:

652

ptext = revlog.revision(base, _df=fh, raw=True)

651

ptext = revlog.revision(base, _df=fh, raw=True)

653

delta = mdiff.textdiff(ptext, t)

652

delta = mdiff.textdiff(ptext, t)

654

653

655

return delta

654

return delta

656

655

657

def _builddeltainfo(self, revinfo, base, fh):

656

def _builddeltainfo(self, revinfo, base, fh):

658

# can we use the cached delta?

657

# can we use the cached delta?

659

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

658

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

660

delta = revinfo.cachedelta[1]

659

delta = revinfo.cachedelta[1]

661

else:

660

else:

662

delta = self._builddeltadiff(base, revinfo, fh)

661

delta = self._builddeltadiff(base, revinfo, fh)

663

revlog = self.revlog

662

revlog = self.revlog

664

header, data = revlog.compress(delta)

663

header, data = revlog.compress(delta)

665

deltalen = len(header) + len(data)

664

deltalen = len(header) + len(data)

666

chainbase = revlog.chainbase(base)

665

chainbase = revlog.chainbase(base)

667

offset = revlog.end(len(revlog) - 1)

666

offset = revlog.end(len(revlog) - 1)

668

dist = deltalen + offset - revlog.start(chainbase)

667

dist = deltalen + offset - revlog.start(chainbase)

669

if revlog._generaldelta:

668

if revlog._generaldelta:

670

deltabase = base

669

deltabase = base

671

else:

670

else:

672

deltabase = chainbase

671

deltabase = chainbase

673

chainlen, compresseddeltalen = revlog._chaininfo(base)

672

chainlen, compresseddeltalen = revlog._chaininfo(base)

674

chainlen += 1

673

chainlen += 1

675

compresseddeltalen += deltalen

674

compresseddeltalen += deltalen

676

675

677

revlog = self.revlog

676

revlog = self.revlog

678

snapshotdepth = None

677

snapshotdepth = None

679

if deltabase == nullrev:

678

if deltabase == nullrev:

680

snapshotdepth = 0

679

snapshotdepth = 0

681

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

680

elif revlog._sparserevlog and revlog.issnapshot(deltabase):

682

# A delta chain should always be one full snapshot,

681

# A delta chain should always be one full snapshot,

683

# zero or more semi-snapshots, and zero or more deltas

682

# zero or more semi-snapshots, and zero or more deltas

684

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

683

p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)

685

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

684

if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):

686

snapshotdepth = len(revlog._deltachain(deltabase)[0])

685

snapshotdepth = len(revlog._deltachain(deltabase)[0])

687

686

688

return _deltainfo(dist, deltalen, (header, data), deltabase,

687

return _deltainfo(dist, deltalen, (header, data), deltabase,

689

chainbase, chainlen, compresseddeltalen,

688

chainbase, chainlen, compresseddeltalen,

690

snapshotdepth)

689

snapshotdepth)

691

690

692

def _fullsnapshotinfo(self, fh, revinfo):

691

def _fullsnapshotinfo(self, fh, revinfo):

693

curr = len(self.revlog)

692

curr = len(self.revlog)

694

rawtext = self.buildtext(revinfo, fh)

693

rawtext = self.buildtext(revinfo, fh)

695

data = self.revlog.compress(rawtext)

694

data = self.revlog.compress(rawtext)

696

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

695

compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])

697

deltabase = chainbase = curr

696

deltabase = chainbase = curr

698

snapshotdepth = 0

697

snapshotdepth = 0

699

chainlen = 1

698

chainlen = 1

700

699

701

return _deltainfo(dist, deltalen, data, deltabase,

700

return _deltainfo(dist, deltalen, data, deltabase,

702

chainbase, chainlen, compresseddeltalen,

701

chainbase, chainlen, compresseddeltalen,

703

snapshotdepth)

702

snapshotdepth)

704

703

705

def finddeltainfo(self, revinfo, fh):

704

def finddeltainfo(self, revinfo, fh):

706

"""Find an acceptable delta against a candidate revision

705

"""Find an acceptable delta against a candidate revision

707

706

708

revinfo: information about the revision (instance of _revisioninfo)

707

revinfo: information about the revision (instance of _revisioninfo)

709

fh: file handle to either the .i or the .d revlog file,

708

fh: file handle to either the .i or the .d revlog file,

710

depending on whether it is inlined or not

709

depending on whether it is inlined or not

711

710

712

Returns the first acceptable candidate revision, as ordered by

711

Returns the first acceptable candidate revision, as ordered by

713

_candidategroups

712

_candidategroups

714

713

715

If no suitable deltabase is found, we return delta info for a full

714

If no suitable deltabase is found, we return delta info for a full

716

snapshot.

715

snapshot.

717

"""

716

"""

718

if not revinfo.textlen:

717

if not revinfo.textlen:

719

return self._fullsnapshotinfo(fh, revinfo)

718

return self._fullsnapshotinfo(fh, revinfo)

720

719

721

# no delta for flag processor revision (see "candelta" for why)

720

# no delta for flag processor revision (see "candelta" for why)

722

# not calling candelta since only one revision needs test, also to

721

# not calling candelta since only one revision needs test, also to

723

# avoid overhead fetching flags again.

722

# avoid overhead fetching flags again.

724

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

723

if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

725

return self._fullsnapshotinfo(fh, revinfo)

724

return self._fullsnapshotinfo(fh, revinfo)

726

725

727

cachedelta = revinfo.cachedelta

726

cachedelta = revinfo.cachedelta

728

p1 = revinfo.p1

727

p1 = revinfo.p1

729

p2 = revinfo.p2

728

p2 = revinfo.p2

730

revlog = self.revlog

729

revlog = self.revlog

731

730

732

deltalength = self.revlog.length

731

deltalength = self.revlog.length

733

deltaparent = self.revlog.deltaparent

732

deltaparent = self.revlog.deltaparent

734

733

735

deltainfo = None

734

deltainfo = None

736

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

735

deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT

737

groups = _candidategroups(self.revlog, p1, p2, cachedelta)

736

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

737

groups = _candidategroups(self.revlog, p1r, p2r, cachedelta)

738

for candidaterevs in groups:

738

for candidaterevs in groups:

739

# filter out delta base that will never produce good delta

739

# filter out delta base that will never produce good delta

740

candidaterevs = [r for r in candidaterevs

740

candidaterevs = [r for r in candidaterevs

741

if self.revlog.length(r) <= deltas_limit]

741

if self.revlog.length(r) <= deltas_limit]

742

nominateddeltas = []

742

nominateddeltas = []

743

for candidaterev in candidaterevs:

743

for candidaterev in candidaterevs:

744

# skip over empty delta (no need to include them in a chain)

744

# skip over empty delta (no need to include them in a chain)

745

while candidaterev != nullrev and not deltalength(candidaterev):

745

while candidaterev != nullrev and not deltalength(candidaterev):

746

candidaterev = deltaparent(candidaterev)

746

candidaterev = deltaparent(candidaterev)

747

# no need to try a delta against nullid, this will be handled

747

# no need to try a delta against nullid, this will be handled

748

# by fulltext later.

748

# by fulltext later.

749

if candidaterev == nullrev:

749

if candidaterev == nullrev:

750

continue

750

continue

751

# no delta for rawtext-changing revs (see "candelta" for why)

751

# no delta for rawtext-changing revs (see "candelta" for why)

752

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

752

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

753

continue

753

continue

754

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

754

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

755

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

755

if isgooddeltainfo(self.revlog, candidatedelta, revinfo):

756

nominateddeltas.append(candidatedelta)

756

nominateddeltas.append(candidatedelta)

757

if nominateddeltas:

757

if nominateddeltas:

758

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

758

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

759

break

759

break

760

761

if deltainfo is None:

761

if deltainfo is None:

762

deltainfo = self._fullsnapshotinfo(fh, revinfo)

762

deltainfo = self._fullsnapshotinfo(fh, revinfo)

763

return deltainfo

763

return deltainfo

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlogdeltas.py - Logic around delta computation for revlog
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             # Copyright 2018 Octobus <contact@octobus.net>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Helper class to compute deltas stored inside revlogs"""
             from __future__ import absolute_import
             import heapq
             import struct
             # import stuff from node for others to import from revlog
             from ..node import (
                 nullrev,
             )
             from ..i18n import _
             from .constants import (
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 mdiff,
             )
             RevlogError = error.RevlogError
             CensoredNodeError = error.CensoredNodeError
             # maximum <delta-chain-data>/<revision-text-length> ratio
             LIMIT_DELTA2TEXT = 2
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
                 def __len__(self):
                     return len(self._data)
             def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
                 `revlog._srmingapsize` is skipped.
                 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
                 For consistency with other slicing choice, this limit won't go lower than
                 `revlog._srmingapsize`.
                 If individual revisions chunk are larger than this limit, they will still
                 be raised individually.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(slicechunk(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(slicechunk(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 Slicing with a maximum chunk size
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
                 [[0], [11], [13], [15]]
                 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
                 [[0], [11], [13, 15]]
                 """
                 if targetsize is not None:
                     targetsize = max(targetsize, revlog._srmingapsize)
                 # targetsize should not be specified when evaluating delta candidates:
                 # * targetsize is used to ensure we stay within specification when reading,
                 # * deltainfo is used to pick are good delta chain when writing.
                 if not (deltainfo is None or targetsize is None):
                     msg = 'cannot use `targetsize` with a `deltainfo`'
                     raise error.ProgrammingError(msg)
                 for chunk in _slicechunktodensity(revlog, revs,
                                                   deltainfo,
                                                   revlog._srdensitythreshold,
                                                   revlog._srmingapsize):
                     for subchunk in _slicechunktosize(revlog, chunk, targetsize):
                         yield subchunk
             def _slicechunktosize(revlog, revs, targetsize=None):
                 """slice revs to match the target size
                 This is intended to be used on chunk that density slicing selected by that
                 are still too large compared to the read garantee of revlog. This might
                 happens when "minimal gap size" interrupted the slicing or when chain are
                 built in a way that create large blocks next to each other.
                 >>> revlog = _testrevlog([
                 ...  3,  #0 (3)
                 ...  5,  #1 (2)
                 ...  6,  #2 (1)
                 ...  8,  #3 (2)
                 ...  8,  #4 (empty)
                 ...  11, #5 (3)
                 ...  12, #6 (1)
                 ...  13, #7 (1)
                 ...  14, #8 (1)
                 ... ])
                 Cases where chunk is already small enough
                 >>> list(_slicechunktosize(revlog, [0], 3))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], 3))
                 [[6, 7]]
                 >>> list(_slicechunktosize(revlog, [0], None))
                 [[0]]
                 >>> list(_slicechunktosize(revlog, [6, 7], None))
                 [[6, 7]]
                 cases where we need actual slicing
                 >>> list(_slicechunktosize(revlog, [0, 1], 3))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 3))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
                 [[1, 2], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
                 [[3], [5]]
                 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
                 [[5], [6, 7, 8]]
                 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
                 [[0], [1, 2], [3], [5], [6, 7, 8]]
                 Case with too large individual chunk (must return valid chunk)
                 >>> list(_slicechunktosize(revlog, [0, 1], 2))
                 [[0], [1]]
                 >>> list(_slicechunktosize(revlog, [1, 3], 1))
                 [[1], [3]]
                 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
                 [[3], [5]]
                 """
                 assert targetsize is None or 0 <= targetsize
                 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
                     yield revs
                     return
                 startrevidx = 0
                 startdata = revlog.start(revs[0])
                 endrevidx = 0
                 iterrevs = enumerate(revs)
                 next(iterrevs) # skip first rev.
                 for idx, r in iterrevs:
                     span = revlog.end(r) - startdata
                     if span <= targetsize:
                         endrevidx = idx
                     else:
                         chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
                         if chunk:
                             yield chunk
                         startrevidx = idx
                         startdata = revlog.start(r)
                         endrevidx = idx
                 yield _trimchunk(revlog, revs, startrevidx)
             def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
                                      mingapsize=0):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 ``deltainfo`` is a _deltainfo instance of a revision that we would append
                 to the top of the revlog.
                 The initial chunk is sliced until the overall density (payload/chunks-span
                 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
                 skipped.
                 >>> revlog = _testrevlog([
                 ...  5,  #00 (5)
                 ...  10, #01 (5)
                 ...  12, #02 (2)
                 ...  12, #03 (empty)
                 ...  27, #04 (15)
                 ...  31, #05 (4)
                 ...  31, #06 (empty)
                 ...  42, #07 (11)
                 ...  47, #08 (5)
                 ...  47, #09 (empty)
                 ...  48, #10 (1)
                 ...  51, #11 (3)
                 ...  74, #12 (23)
                 ...  85, #13 (11)
                 ...  86, #14 (1)
                 ...  91, #15 (5)
                 ... ])
                 >>> list(_slicechunktodensity(revlog, list(range(16))))
                 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
                 >>> list(_slicechunktodensity(revlog, [0, 15]))
                 [[0], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
                 [[0], [11], [15]]
                 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
                 [[0], [11, 13, 15]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           mingapsize=20))
                 [[1, 2, 3, 5, 8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95))
                 [[1, 2], [5], [8, 10, 11], [14]]
                 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
                 ...                           targetdensity=0.95, mingapsize=12))
                 [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 nextrev = len(revlog)
                 nextoffset = revlog.end(nextrev - 1)
                 if deltainfo is None:
                     deltachainspan = segmentspan(revlog, revs)
                     chainpayload = sum(length(r) for r in revs)
                 else:
                     deltachainspan = deltainfo.distance
                     chainpayload = deltainfo.compresseddeltalen
                 if deltachainspan < mingapsize:
                     yield revs
                     return
                 readdata = deltachainspan
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= targetdensity:
                     yield revs
                     return
                 if deltainfo is not None and deltainfo.deltalen:
                     revs = list(revs)
                     revs.append(nextrev)
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     if rev < nextrev:
                         revstart = start(rev)
                         revlen = length(rev)
                     else:
                         revstart = nextoffset
                         revlen = deltainfo.deltalen
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > mingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < targetdensity:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # If we have a non-emtpy delta candidate, there are nothing to trim
                 if revs[endidx - 1] < len(revlog):
                     # Trim empty revs at the end, except the very first revision of a chain
                     while (endidx > 1
                             and endidx > startidx
                             and length(revs[endidx - 1]) == 0):
                         endidx -= 1
                 return revs[startidx:endidx]
             def segmentspan(revlog, revs, deltainfo=None):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> segmentspan(revlog, [0, 4])
                 >>> segmentspan(revlog, [3, 4])
                 >>> segmentspan(revlog, [1, 2, 3,])
                 >>> segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 if deltainfo is not None and len(revlog) <= revs[-1]:
                     if len(revs) == 1:
                         return deltainfo.deltalen
                     offset = revlog.end(len(revlog) - 1)
                     end = deltainfo.deltalen + offset
                 else:
                     end = revlog.end(revs[-1])
                 return end - revlog.start(revs[0])
             def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
                 """build full text from a (base, delta) pair and other metadata"""
                 # special case deltas which replace entire base; no need to decode
                 # base revision. this neatly avoids censored bases, which throw when
                 # they're decoded.
                 hlen = struct.calcsize(">lll")
                 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                            len(delta) - hlen):
                     fulltext = delta[hlen:]
                 else:
                     # deltabase is rawtext before changed by flag processors, which is
                     # equivalent to non-raw text
                     basetext = revlog.revision(baserev, _df=fh, raw=False)
                     fulltext = mdiff.patch(basetext, delta)
                 try:
                     res = revlog._processflags(fulltext, flags, 'read', raw=True)
                     fulltext, validatehash = res
                     if validatehash:
                         revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
                     if flags & REVIDX_ISCENSORED:
                         raise RevlogError(_('node %s is not censored') % expectednode)
                 except CensoredNodeError:
                     # must pass the censored index flag to add censored revisions
                     if not flags & REVIDX_ISCENSORED:
                         raise
                 return fulltext
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
                 snapshotdepth = attr.ib()
             def isgooddeltainfo(revlog, deltainfo, revinfo):
                 """Returns True if the given delta is good. Good means that it is within
                 the disk span, disk size, and chain length bounds that we know to be
                 performant."""
                 if deltainfo is None:
                     return False
                 # - 'deltainfo.distance' is the distance from the base revision --
                 #   bounding it limits the amount of I/O we need to do.
                 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                 #   deltas we need to apply -- bounding it limits the amount of CPU
                 #   we consume.
                 if revlog._sparserevlog:
                     # As sparse-read will be used, we can consider that the distance,
                     # instead of being the span of the whole chunk,
                     # is the span of the largest read chunk
                     base = deltainfo.base
                     if base != nullrev:
                         deltachain = revlog._deltachain(base)[0]
                     else:
                         deltachain = []
                     # search for the first non-snapshot revision
                     for idx, r in enumerate(deltachain):
                         if not revlog.issnapshot(r):
                             break
                     deltachain = deltachain[idx:]
                     chunks = slicechunk(revlog, deltachain, deltainfo)
                     all_span = [segmentspan(revlog, revs, deltainfo)
                                 for revs in chunks]
                     distance = max(all_span)
                 else:
                     distance = deltainfo.distance
                 textlen = revinfo.textlen
                 defaultmax = textlen * 4
                 maxdist = revlog._maxdeltachainspan
                 if not maxdist:
                     maxdist = distance # ensure the conditional pass
                 maxdist = max(maxdist, defaultmax)
                 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
                     # In multiple place, we are ignoring irrelevant data range below a
                     # certain size. Be also apply this tradeoff here and relax span
                     # constraint for small enought content.
                     maxdist = revlog._srmingapsize
                 # Bad delta from read span:
                 #
                 #   If the span of data read is larger than the maximum allowed.
                 if maxdist < distance:
                     return False
                 # Bad delta from new delta size:
                 #
                 #   If the delta size is larger than the target text, storing the
                 #   delta will be inefficient.
                 if textlen < deltainfo.deltalen:
                     return False
                 # Bad delta from cumulated payload size:
                 #
                 #   If the sum of delta get larger than K * target text length.
                 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
                     return False
                 # Bad delta from chain length:
                 #
                 #   If the number of delta in the chain gets too high.
                 if (revlog._maxchainlen
                         and revlog._maxchainlen < deltainfo.chainlen):
                     return False
                 # bad delta from intermediate snapshot size limit
                 #
                 #   If an intermediate snapshot size is higher than the limit.  The
                 #   limit exist to prevent endless chain of intermediate delta to be
                 #   created.
                 if (deltainfo.snapshotdepth is not None and
                         (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
                     return False
                 # bad delta if new intermediate snapshot is larger than the previous
                 # snapshot
                 if (deltainfo.snapshotdepth
                         and revlog.length(deltainfo.base) < deltainfo.deltalen):
                     return False
                 return True
             def _candidategroups(revlog, p1, p2, cachedelta):
                 """
                 Provides revisions that present an interest to be diffed against,
                 grouped by level of easiness.
                 """
                 gdelta = revlog._generaldelta
                 curr = len(revlog)
                 prev = curr - 1
-                p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                 # should we try to build a delta?
                 if prev != nullrev and revlog._storedeltachains:
                     tested = set()
                     # This condition is true most of the time when processing
                     # changegroup data into a generaldelta repo. The only time it
                     # isn't true is if this is the first revision in a delta chain
                     # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                     if cachedelta and gdelta and revlog._lazydeltabase:
                         # Assume what we received from the server is a good choice
                         # build delta will reuse the cache
                         yield (cachedelta[0],)
                         tested.add(cachedelta[0])
                     if gdelta:
                         # exclude already lazy tested base if any
-                        parents = [p for p in (p1r, p2r)
+                        parents = [p for p in (p1, p2)
                                    if p != nullrev and p not in tested]
                         if not revlog._deltabothparents and len(parents) == 2:
                             parents.sort()
                             # To minimize the chance of having to build a fulltext,
                             # pick first whichever parent is closest to us (max rev)
                             yield (parents[1],)
                             # then the other one (min rev) if the first did not fit
                             yield (parents[0],)
                             tested.update(parents)
                         elif len(parents) > 0:
                             # Test all parents (1 or 2), and keep the best candidate
                             yield parents
                             tested.update(parents)
                     if prev not in tested:
                         # other approach failed try against prev to hopefully save us a
                         # fulltext.
                         yield (prev,)
                         tested.add(prev)
             class deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
                                                          revinfo.p1, revinfo.p2,
                                                          revinfo.flags, revinfo.node)
                     return fulltext
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     revlog = self.revlog
                     snapshotdepth = None
                     if deltabase == nullrev:
                         snapshotdepth = 0
                     elif revlog._sparserevlog and revlog.issnapshot(deltabase):
                         # A delta chain should always be one full snapshot,
                         # zero or more semi-snapshots, and zero or more deltas
                         p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
                         if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
                             snapshotdepth = len(revlog._deltachain(deltabase)[0])
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def _fullsnapshotinfo(self, fh, revinfo):
                     curr = len(self.revlog)
                     rawtext = self.buildtext(revinfo, fh)
                     data = self.revlog.compress(rawtext)
                     compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
                     deltabase = chainbase = curr
                     snapshotdepth = 0
                     chainlen = 1
                     return _deltainfo(dist, deltalen, data, deltabase,
                                       chainbase, chainlen, compresseddeltalen,
                                       snapshotdepth)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _candidategroups
                     If no suitable deltabase is found, we return delta info for a full
                     snapshot.
                     """
                     if not revinfo.textlen:
                         return self._fullsnapshotinfo(fh, revinfo)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         return self._fullsnapshotinfo(fh, revinfo)
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltalength = self.revlog.length
                     deltaparent = self.revlog.deltaparent
                     deltainfo = None
                     deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
-                    groups = _candidategroups(self.revlog, p1, p2, cachedelta)
+                    p1r, p2r = revlog.rev(p1), revlog.rev(p2)
+                    groups = _candidategroups(self.revlog, p1r, p2r, cachedelta)
                     for candidaterevs in groups:
                         # filter out delta base that will never produce good delta
                         candidaterevs = [r for r in candidaterevs
                                          if self.revlog.length(r) <= deltas_limit]
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # skip over empty delta (no need to include them in a chain)
                             while candidaterev != nullrev and not deltalength(candidaterev):
                                 candidaterev = deltaparent(candidaterev)
                             # no need to try a delta against nullid, this will be handled
                             # by fulltext later.
                             if candidaterev == nullrev:
                                 continue
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     if deltainfo is None:
                         deltainfo = self._fullsnapshotinfo(fh, revinfo)
                     return deltainfo