upstream/mercurial-mirror Commit - r38659:f62b8fb0

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import re

22

import re

23

import struct

23

import struct

24

import zlib

24

import zlib

25

26

# import stuff from node for others to import from revlog

26

# import stuff from node for others to import from revlog

27

from .node import (

27

from .node import (

28

bin,

28

bin,

29

hex,

29

hex,

30

nullid,

30

nullid,

31

nullrev,

31

nullrev,

32

wdirfilenodeids,

32

wdirfilenodeids,

33

wdirhex,

33

wdirhex,

34

wdirid,

34

wdirid,

35

wdirrev,

35

wdirrev,

36

)

36

)

37

from .i18n import _

37

from .i18n import _

38

from .thirdparty import (

38

from .thirdparty import (

39

attr,

39

attr,

40

)

40

)

41

from . import (

41

from . import (

42

ancestor,

42

ancestor,

43

error,

43

error,

44

mdiff,

44

mdiff,

45

policy,

45

policy,

46

pycompat,

46

pycompat,

47

templatefilters,

47

templatefilters,

48

util,

48

util,

49

)

49

)

50

from .utils import (

50

from .utils import (

51

stringutil,

51

stringutil,

52

)

52

)

53

54

parsers = policy.importmod(r'parsers')

54

parsers = policy.importmod(r'parsers')

55

56

# Aliased for performance.

56

# Aliased for performance.

57

_zlibdecompress = zlib.decompress

57

_zlibdecompress = zlib.decompress

58

59

# revlog header flags

59

# revlog header flags

60

REVLOGV0 = 0

60

REVLOGV0 = 0

61

REVLOGV1 = 1

61

REVLOGV1 = 1

62

# Dummy value until file format is finalized.

62

# Dummy value until file format is finalized.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

63

# Reminder: change the bounds check in revlog.__init__ when this is changed.

64

REVLOGV2 = 0xDEAD

64

REVLOGV2 = 0xDEAD

65

FLAG_INLINE_DATA = (1 << 16)

65

FLAG_INLINE_DATA = (1 << 16)

66

FLAG_GENERALDELTA = (1 << 17)

66

FLAG_GENERALDELTA = (1 << 17)

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

67

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

68

REVLOG_DEFAULT_FORMAT = REVLOGV1

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

69

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

70

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

71

REVLOGV2_FLAGS = REVLOGV1_FLAGS

72

73

# revlog index flags

73

# revlog index flags

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

74

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

75

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

76

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

77

REVIDX_DEFAULT_FLAGS = 0

77

REVIDX_DEFAULT_FLAGS = 0

78

# stable order in which flags need to be processed and their processors applied

78

# stable order in which flags need to be processed and their processors applied

79

REVIDX_FLAGS_ORDER = [

79

REVIDX_FLAGS_ORDER = [

80

REVIDX_ISCENSORED,

80

REVIDX_ISCENSORED,

81

REVIDX_ELLIPSIS,

81

REVIDX_ELLIPSIS,

82

REVIDX_EXTSTORED,

82

REVIDX_EXTSTORED,

83

]

83

]

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

84

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

85

# bitmark for flags that could cause rawdata content change

85

# bitmark for flags that could cause rawdata content change

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

86

REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED

87

88

# max size of revlog with inline data

88

# max size of revlog with inline data

89

_maxinline = 131072

89

_maxinline = 131072

90

_chunksize = 1048576

90

_chunksize = 1048576

91

92

RevlogError = error.RevlogError

92

RevlogError = error.RevlogError

93

LookupError = error.LookupError

93

LookupError = error.LookupError

94

CensoredNodeError = error.CensoredNodeError

94

CensoredNodeError = error.CensoredNodeError

95

ProgrammingError = error.ProgrammingError

95

ProgrammingError = error.ProgrammingError

96

97

# Store flag processors (cf. 'addflagprocessor()' to register)

97

# Store flag processors (cf. 'addflagprocessor()' to register)

98

_flagprocessors = {

98

_flagprocessors = {

99

REVIDX_ISCENSORED: None,

99

REVIDX_ISCENSORED: None,

100

}

100

}

101

102

_mdre = re.compile('\1\n')

102

_mdre = re.compile('\1\n')

103

def parsemeta(text):

103

def parsemeta(text):

104

"""return (metadatadict, metadatasize)"""

104

"""return (metadatadict, metadatasize)"""

105

# text can be buffer, so we can't use .startswith or .index

105

# text can be buffer, so we can't use .startswith or .index

106

if text[:2] != '\1\n':

106

if text[:2] != '\1\n':

107

return None, None

107

return None, None

108

s = _mdre.search(text, 2).start()

108

s = _mdre.search(text, 2).start()

109

mtext = text[2:s]

109

mtext = text[2:s]

110

meta = {}

110

meta = {}

111

for l in mtext.splitlines():

111

for l in mtext.splitlines():

112

k, v = l.split(": ", 1)

112

k, v = l.split(": ", 1)

113

meta[k] = v

113

meta[k] = v

114

return meta, (s + 2)

114

return meta, (s + 2)

115

116

def packmeta(meta, text):

116

def packmeta(meta, text):

117

keys = sorted(meta)

117

keys = sorted(meta)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

118

metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)

119

return "\1\n%s\1\n%s" % (metatext, text)

119

return "\1\n%s\1\n%s" % (metatext, text)

120

121

def _censoredtext(text):

121

def _censoredtext(text):

122

m, offs = parsemeta(text)

122

m, offs = parsemeta(text)

123

return m and "censored" in m

123

return m and "censored" in m

124

125

def addflagprocessor(flag, processor):

125

def addflagprocessor(flag, processor):

126

"""Register a flag processor on a revision data flag.

126

"""Register a flag processor on a revision data flag.

127

128

Invariant:

128

Invariant:

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

129

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

130

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

131

- Only one flag processor can be registered on a specific flag.

131

- Only one flag processor can be registered on a specific flag.

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

132

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

133

following signatures:

133

following signatures:

134

- (read) f(self, rawtext) -> text, bool

134

- (read) f(self, rawtext) -> text, bool

135

- (write) f(self, text) -> rawtext, bool

135

- (write) f(self, text) -> rawtext, bool

136

- (raw) f(self, rawtext) -> bool

136

- (raw) f(self, rawtext) -> bool

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

137

"text" is presented to the user. "rawtext" is stored in revlog data, not

138

directly visible to the user.

138

directly visible to the user.

139

The boolean returned by these transforms is used to determine whether

139

The boolean returned by these transforms is used to determine whether

140

the returned text can be used for hash integrity checking. For example,

140

the returned text can be used for hash integrity checking. For example,

141

if "write" returns False, then "text" is used to generate hash. If

141

if "write" returns False, then "text" is used to generate hash. If

142

"write" returns True, that basically means "rawtext" returned by "write"

142

"write" returns True, that basically means "rawtext" returned by "write"

143

should be used to generate hash. Usually, "write" and "read" return

143

should be used to generate hash. Usually, "write" and "read" return

144

different booleans. And "raw" returns a same boolean as "write".

144

different booleans. And "raw" returns a same boolean as "write".

145

146

Note: The 'raw' transform is used for changegroup generation and in some

146

Note: The 'raw' transform is used for changegroup generation and in some

147

debug commands. In this case the transform only indicates whether the

147

debug commands. In this case the transform only indicates whether the

148

contents can be used for hash integrity checks.

148

contents can be used for hash integrity checks.

149

"""

149

"""

150

if not flag & REVIDX_KNOWN_FLAGS:

150

if not flag & REVIDX_KNOWN_FLAGS:

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

151

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

152

raise ProgrammingError(msg)

152

raise ProgrammingError(msg)

153

if flag not in REVIDX_FLAGS_ORDER:

153

if flag not in REVIDX_FLAGS_ORDER:

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

154

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

155

raise ProgrammingError(msg)

155

raise ProgrammingError(msg)

156

if flag in _flagprocessors:

156

if flag in _flagprocessors:

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

157

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

158

raise error.Abort(msg)

158

raise error.Abort(msg)

159

_flagprocessors[flag] = processor

159

_flagprocessors[flag] = processor

160

161

def getoffset(q):

161

def getoffset(q):

162

return int(q >> 16)

162

return int(q >> 16)

163

164

def gettype(q):

164

def gettype(q):

165

return int(q & 0xFFFF)

165

return int(q & 0xFFFF)

166

167

def offset_type(offset, type):

167

def offset_type(offset, type):

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

168

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

169

raise ValueError('unknown revlog index flags')

169

raise ValueError('unknown revlog index flags')

170

return int(int(offset) << 16 | type)

170

return int(int(offset) << 16 | type)

171

172

_nullhash = hashlib.sha1(nullid)

172

_nullhash = hashlib.sha1(nullid)

173

174

def hash(text, p1, p2):

174

def hash(text, p1, p2):

175

"""generate a hash from the given text and its parent hashes

175

"""generate a hash from the given text and its parent hashes

176

177

This hash combines both the current file contents and its history

177

This hash combines both the current file contents and its history

178

in a manner that makes it easy to distinguish nodes with the same

178

in a manner that makes it easy to distinguish nodes with the same

179

content in the revision graph.

179

content in the revision graph.

180

"""

180

"""

181

# As of now, if one of the parent node is null, p2 is null

181

# As of now, if one of the parent node is null, p2 is null

182

if p2 == nullid:

182

if p2 == nullid:

183

# deep copy of a hash is faster than creating one

183

# deep copy of a hash is faster than creating one

184

s = _nullhash.copy()

184

s = _nullhash.copy()

185

s.update(p1)

185

s.update(p1)

186

else:

186

else:

187

# none of the parent nodes are nullid

187

# none of the parent nodes are nullid

188

if p1 < p2:

188

if p1 < p2:

189

a = p1

189

a = p1

190

b = p2

190

b = p2

191

else:

191

else:

192

a = p2

192

a = p2

193

b = p1

193

b = p1

194

s = hashlib.sha1(a)

194

s = hashlib.sha1(a)

195

s.update(b)

195

s.update(b)

196

s.update(text)

196

s.update(text)

197

return s.digest()

197

return s.digest()

198

199

class _testrevlog(object):

199

class _testrevlog(object):

200

"""minimalist fake revlog to use in doctests"""

200

"""minimalist fake revlog to use in doctests"""

201

202

def __init__(self, data, density=0.5, mingap=0):

202

def __init__(self, data, density=0.5, mingap=0):

203

"""data is an list of revision payload boundaries"""

203

"""data is an list of revision payload boundaries"""

204

self._data = data

204

self._data = data

205

self._srdensitythreshold = density

205

self._srdensitythreshold = density

206

self._srmingapsize = mingap

206

self._srmingapsize = mingap

207

208

def start(self, rev):

208

def start(self, rev):

209

if rev == 0:

209

if rev == 0:

210

return 0

210

return 0

211

return self._data[rev - 1]

211

return self._data[rev - 1]

212

213

def end(self, rev):

213

def end(self, rev):

214

return self._data[rev]

214

return self._data[rev]

215

216

def length(self, rev):

216

def length(self, rev):

217

return self.end(rev) - self.start(rev)

217

return self.end(rev) - self.start(rev)

218

219

def _trimchunk(revlog, revs, startidx, endidx=None):

219

def _trimchunk(revlog, revs, startidx, endidx=None):

220

"""returns revs[startidx:endidx] without empty trailing revs

220

"""returns revs[startidx:endidx] without empty trailing revs

221

222

Doctest Setup

222

Doctest Setup

223

>>> revlog = _testrevlog([

223

>>> revlog = _testrevlog([

224

... 5, #0

224

... 5, #0

225

... 10, #1

225

... 10, #1

226

... 12, #2

226

... 12, #2

227

... 12, #3 (empty)

227

... 12, #3 (empty)

228

... 17, #4

228

... 17, #4

229

... 21, #5

229

... 21, #5

230

... 21, #6 (empty)

230

... 21, #6 (empty)

231

... ])

231

... ])

232

233

Contiguous cases:

233

Contiguous cases:

234

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

234

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)

235

[0, 1, 2, 3, 4, 5]

235

[0, 1, 2, 3, 4, 5]

236

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

236

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)

237

[0, 1, 2, 3, 4]

237

[0, 1, 2, 3, 4]

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

238

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)

239

[0, 1, 2]

239

[0, 1, 2]

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

240

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)

241

[2]

241

[2]

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

242

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)

243

[3, 4, 5]

243

[3, 4, 5]

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

244

>>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)

245

[3, 4]

245

[3, 4]

246

247

Discontiguous cases:

247

Discontiguous cases:

248

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

248

>>> _trimchunk(revlog, [1, 3, 5, 6], 0)

249

[1, 3, 5]

249

[1, 3, 5]

250

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

250

>>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)

251

[1]

251

[1]

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

252

>>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)

253

[3, 5]

253

[3, 5]

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

254

>>> _trimchunk(revlog, [1, 3, 5, 6], 1)

255

[3, 5]

255

[3, 5]

256

"""

256

"""

257

length = revlog.length

257

length = revlog.length

258

259

if endidx is None:

259

if endidx is None:

260

endidx = len(revs)

260

endidx = len(revs)

261

262

# Trim empty revs at the end, but never the very first revision of a chain

262

# Trim empty revs at the end, but never the very first revision of a chain

263

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

263

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

264

endidx -= 1

264

endidx -= 1

265

266

return revs[startidx:endidx]

266

return revs[startidx:endidx]

267

268

def _segmentspan(revlog, revs):

268

def _segmentspan(revlog, revs):

269

"""Get the byte span of a segment of revisions

269

"""Get the byte span of a segment of revisions

270

271

revs is a sorted array of revision numbers

271

revs is a sorted array of revision numbers

272

273

>>> revlog = _testrevlog([

273

>>> revlog = _testrevlog([

274

... 5, #0

274

... 5, #0

275

... 10, #1

275

... 10, #1

276

... 12, #2

276

... 12, #2

277

... 12, #3 (empty)

277

... 12, #3 (empty)

278

... 17, #4

278

... 17, #4

279

... ])

279

... ])

280

281

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

281

>>> _segmentspan(revlog, [0, 1, 2, 3, 4])

282

17

282

17

283

>>> _segmentspan(revlog, [0, 4])

283

>>> _segmentspan(revlog, [0, 4])

284

17

284

17

285

>>> _segmentspan(revlog, [3, 4])

285

>>> _segmentspan(revlog, [3, 4])

286

5

286

5

287

>>> _segmentspan(revlog, [1, 2, 3,])

287

>>> _segmentspan(revlog, [1, 2, 3,])

288

7

288

7

289

>>> _segmentspan(revlog, [1, 3])

289

>>> _segmentspan(revlog, [1, 3])

290

7

290

7

291

"""

291

"""

292

if not revs:

292

if not revs:

293

return 0

293

return 0

294

return revlog.end(revs[-1]) - revlog.start(revs[0])

294

return revlog.end(revs[-1]) - revlog.start(revs[0])

295

296

def _slicechunk(revlog, revs):

296

def _slicechunk(revlog, revs):

297

"""slice revs to reduce the amount of unrelated data to be read from disk.

297

"""slice revs to reduce the amount of unrelated data to be read from disk.

298

299

``revs`` is sliced into groups that should be read in one time.

299

``revs`` is sliced into groups that should be read in one time.

300

Assume that revs are sorted.

300

Assume that revs are sorted.

301

302

The initial chunk is sliced until the overall density (payload/chunks-span

303

ratio) is above `revlog._srdensitythreshold`. No gap smaller than

304

`revlog._srmingapsize` is skipped.

305

306

>>> revlog = _testrevlog([

307

... 5, #00 (5)

308

... 10, #01 (5)

309

... 12, #02 (2)

310

... 12, #03 (empty)

311

... 27, #04 (15)

312

... 31, #05 (4)

313

... 31, #06 (empty)

314

... 42, #07 (11)

315

... 47, #08 (5)

316

... 47, #09 (empty)

317

... 48, #10 (1)

318

... 51, #11 (3)

319

... 74, #12 (23)

320

... 85, #13 (11)

321

... 86, #14 (1)

322

... 91, #15 (5)

323

... ])

324

325

>>> list(_slicechunk(revlog, range(16)))

326

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]

327

>>> list(_slicechunk(revlog, [0, 15]))

328

[[0], [15]]

329

>>> list(_slicechunk(revlog, [0, 11, 15]))

330

[[0], [11], [15]]

331

>>> list(_slicechunk(revlog, [0, 11, 13, 15]))

332

[[0], [11, 13, 15]]

333

>>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))

334

[[1, 2], [5, 8, 10, 11], [14]]

301

"""

335

"""

302

start = revlog.start

336

start = revlog.start

303

length = revlog.length

337

length = revlog.length

304

338

305

if len(revs) <= 1:

339

if len(revs) <= 1:

306

yield revs

340

yield revs

307

return

341

return

308

342

309

readdata = deltachainspan = _segmentspan(revlog, revs)

343

readdata = deltachainspan = _segmentspan(revlog, revs)

310

344

311

if deltachainspan < revlog._srmingapsize:

345

if deltachainspan < revlog._srmingapsize:

312

yield revs

346

yield revs

313

return

347

return

314

348

315

chainpayload = sum(length(r) for r in revs)

349

chainpayload = sum(length(r) for r in revs)

316

350

317

if deltachainspan:

351

if deltachainspan:

318

density = chainpayload / float(deltachainspan)

352

density = chainpayload / float(deltachainspan)

319

else:

353

else:

320

density = 1.0

354

density = 1.0

321

355

322

if density >= revlog._srdensitythreshold:

356

if density >= revlog._srdensitythreshold:

323

yield revs

357

yield revs

324

return

358

return

325

359

326

# Store the gaps in a heap to have them sorted by decreasing size

360

# Store the gaps in a heap to have them sorted by decreasing size

327

gapsheap = []

361

gapsheap = []

328

heapq.heapify(gapsheap)

362

heapq.heapify(gapsheap)

329

prevend = None

363

prevend = None

330

for i, rev in enumerate(revs):

364

for i, rev in enumerate(revs):

331

revstart = start(rev)

365

revstart = start(rev)

332

revlen = length(rev)

366

revlen = length(rev)

333

367

334

# Skip empty revisions to form larger holes

368

# Skip empty revisions to form larger holes

335

if revlen == 0:

369

if revlen == 0:

336

continue

370

continue

337

371

338

if prevend is not None:

372

if prevend is not None:

339

gapsize = revstart - prevend

373

gapsize = revstart - prevend

340

# only consider holes that are large enough

374

# only consider holes that are large enough

341

if gapsize > revlog._srmingapsize:

375

if gapsize > revlog._srmingapsize:

342

heapq.heappush(gapsheap, (-gapsize, i))

376

heapq.heappush(gapsheap, (-gapsize, i))

343

377

344

prevend = revstart + revlen

378

prevend = revstart + revlen

345

379

346

# Collect the indices of the largest holes until the density is acceptable

380

# Collect the indices of the largest holes until the density is acceptable

347

indicesheap = []

381

indicesheap = []

348

heapq.heapify(indicesheap)

382

heapq.heapify(indicesheap)

349

while gapsheap and density < revlog._srdensitythreshold:

383

while gapsheap and density < revlog._srdensitythreshold:

350

oppgapsize, gapidx = heapq.heappop(gapsheap)

384

oppgapsize, gapidx = heapq.heappop(gapsheap)

351

385

352

heapq.heappush(indicesheap, gapidx)

386

heapq.heappush(indicesheap, gapidx)

353

387

354

# the gap sizes are stored as negatives to be sorted decreasingly

388

# the gap sizes are stored as negatives to be sorted decreasingly

355

# by the heap

389

# by the heap

356

readdata -= (-oppgapsize)

390

readdata -= (-oppgapsize)

357

if readdata > 0:

391

if readdata > 0:

358

density = chainpayload / float(readdata)

392

density = chainpayload / float(readdata)

359

else:

393

else:

360

density = 1.0

394

density = 1.0

361

395

362

# Cut the revs at collected indices

396

# Cut the revs at collected indices

363

previdx = 0

397

previdx = 0

364

while indicesheap:

398

while indicesheap:

365

idx = heapq.heappop(indicesheap)

399

idx = heapq.heappop(indicesheap)

366

400

367

chunk = _trimchunk(revlog, revs, previdx, idx)

401

chunk = _trimchunk(revlog, revs, previdx, idx)

368

if chunk:

402

if chunk:

369

yield chunk

403

yield chunk

370

404

371

previdx = idx

405

previdx = idx

372

406

373

chunk = _trimchunk(revlog, revs, previdx)

407

chunk = _trimchunk(revlog, revs, previdx)

374

if chunk:

408

if chunk:

375

yield chunk

409

yield chunk

376

410

377

@attr.s(slots=True, frozen=True)

411

@attr.s(slots=True, frozen=True)

378

class _deltainfo(object):

412

class _deltainfo(object):

379

distance = attr.ib()

413

distance = attr.ib()

380

deltalen = attr.ib()

414

deltalen = attr.ib()

381

data = attr.ib()

415

data = attr.ib()

382

base = attr.ib()

416

base = attr.ib()

383

chainbase = attr.ib()

417

chainbase = attr.ib()

384

chainlen = attr.ib()

418

chainlen = attr.ib()

385

compresseddeltalen = attr.ib()

419

compresseddeltalen = attr.ib()

386

420

387

class _deltacomputer(object):

421

class _deltacomputer(object):

388

def __init__(self, revlog):

422

def __init__(self, revlog):

389

self.revlog = revlog

423

self.revlog = revlog

390

424

391

def _getcandidaterevs(self, p1, p2, cachedelta):

425

def _getcandidaterevs(self, p1, p2, cachedelta):

392

"""

426

"""

393

Provides revisions that present an interest to be diffed against,

427

Provides revisions that present an interest to be diffed against,

394

grouped by level of easiness.

428

grouped by level of easiness.

395

"""

429

"""

396

revlog = self.revlog

430

revlog = self.revlog

397

gdelta = revlog._generaldelta

431

gdelta = revlog._generaldelta

398

curr = len(revlog)

432

curr = len(revlog)

399

prev = curr - 1

433

prev = curr - 1

400

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

434

p1r, p2r = revlog.rev(p1), revlog.rev(p2)

401

435

402

# should we try to build a delta?

436

# should we try to build a delta?

403

if prev != nullrev and revlog.storedeltachains:

437

if prev != nullrev and revlog.storedeltachains:

404

tested = set()

438

tested = set()

405

# This condition is true most of the time when processing

439

# This condition is true most of the time when processing

406

# changegroup data into a generaldelta repo. The only time it

440

# changegroup data into a generaldelta repo. The only time it

407

# isn't true is if this is the first revision in a delta chain

441

# isn't true is if this is the first revision in a delta chain

408

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

442

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

409

if cachedelta and gdelta and revlog._lazydeltabase:

443

if cachedelta and gdelta and revlog._lazydeltabase:

410

# Assume what we received from the server is a good choice

444

# Assume what we received from the server is a good choice

411

# build delta will reuse the cache

445

# build delta will reuse the cache

412

yield (cachedelta[0],)

446

yield (cachedelta[0],)

413

tested.add(cachedelta[0])

447

tested.add(cachedelta[0])

414

448

415

if gdelta:

449

if gdelta:

416

# exclude already lazy tested base if any

450

# exclude already lazy tested base if any

417

parents = [p for p in (p1r, p2r)

451

parents = [p for p in (p1r, p2r)

418

if p != nullrev and p not in tested]

452

if p != nullrev and p not in tested]

419

453

420

if not revlog._aggressivemergedeltas and len(parents) == 2:

454

if not revlog._aggressivemergedeltas and len(parents) == 2:

421

parents.sort()

455

parents.sort()

422

# To minimize the chance of having to build a fulltext,

456

# To minimize the chance of having to build a fulltext,

423

# pick first whichever parent is closest to us (max rev)

457

# pick first whichever parent is closest to us (max rev)

424

yield (parents[1],)

458

yield (parents[1],)

425

# then the other one (min rev) if the first did not fit

459

# then the other one (min rev) if the first did not fit

426

yield (parents[0],)

460

yield (parents[0],)

427

tested.update(parents)

461

tested.update(parents)

428

elif len(parents) > 0:

462

elif len(parents) > 0:

429

# Test all parents (1 or 2), and keep the best candidate

463

# Test all parents (1 or 2), and keep the best candidate

430

yield parents

464

yield parents

431

tested.update(parents)

465

tested.update(parents)

432

466

433

if prev not in tested:

467

if prev not in tested:

434

# other approach failed try against prev to hopefully save us a

468

# other approach failed try against prev to hopefully save us a

435

# fulltext.

469

# fulltext.

436

yield (prev,)

470

yield (prev,)

437

tested.add(prev)

471

tested.add(prev)

438

472

439

def buildtext(self, revinfo, fh):

473

def buildtext(self, revinfo, fh):

440

"""Builds a fulltext version of a revision

474

"""Builds a fulltext version of a revision

441

475

442

revinfo: _revisioninfo instance that contains all needed info

476

revinfo: _revisioninfo instance that contains all needed info

443

fh: file handle to either the .i or the .d revlog file,

477

fh: file handle to either the .i or the .d revlog file,

444

depending on whether it is inlined or not

478

depending on whether it is inlined or not

445

"""

479

"""

446

btext = revinfo.btext

480

btext = revinfo.btext

447

if btext[0] is not None:

481

if btext[0] is not None:

448

return btext[0]

482

return btext[0]

449

483

450

revlog = self.revlog

484

revlog = self.revlog

451

cachedelta = revinfo.cachedelta

485

cachedelta = revinfo.cachedelta

452

flags = revinfo.flags

486

flags = revinfo.flags

453

node = revinfo.node

487

node = revinfo.node

454

488

455

baserev = cachedelta[0]

489

baserev = cachedelta[0]

456

delta = cachedelta[1]

490

delta = cachedelta[1]

457

# special case deltas which replace entire base; no need to decode

491

# special case deltas which replace entire base; no need to decode

458

# base revision. this neatly avoids censored bases, which throw when

492

# base revision. this neatly avoids censored bases, which throw when

459

# they're decoded.

493

# they're decoded.

460

hlen = struct.calcsize(">lll")

494

hlen = struct.calcsize(">lll")

461

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

495

if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),

462

len(delta) - hlen):

496

len(delta) - hlen):

463

btext[0] = delta[hlen:]

497

btext[0] = delta[hlen:]

464

else:

498

else:

465

# deltabase is rawtext before changed by flag processors, which is

499

# deltabase is rawtext before changed by flag processors, which is

466

# equivalent to non-raw text

500

# equivalent to non-raw text

467

basetext = revlog.revision(baserev, _df=fh, raw=False)

501

basetext = revlog.revision(baserev, _df=fh, raw=False)

468

btext[0] = mdiff.patch(basetext, delta)

502

btext[0] = mdiff.patch(basetext, delta)

469

503

470

try:

504

try:

471

res = revlog._processflags(btext[0], flags, 'read', raw=True)

505

res = revlog._processflags(btext[0], flags, 'read', raw=True)

472

btext[0], validatehash = res

506

btext[0], validatehash = res

473

if validatehash:

507

if validatehash:

474

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

508

revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)

475

if flags & REVIDX_ISCENSORED:

509

if flags & REVIDX_ISCENSORED:

476

raise RevlogError(_('node %s is not censored') % node)

510

raise RevlogError(_('node %s is not censored') % node)

477

except CensoredNodeError:

511

except CensoredNodeError:

478

# must pass the censored index flag to add censored revisions

512

# must pass the censored index flag to add censored revisions

479

if not flags & REVIDX_ISCENSORED:

513

if not flags & REVIDX_ISCENSORED:

480

raise

514

raise

481

return btext[0]

515

return btext[0]

482

516

483

def _builddeltadiff(self, base, revinfo, fh):

517

def _builddeltadiff(self, base, revinfo, fh):

484

revlog = self.revlog

518

revlog = self.revlog

485

t = self.buildtext(revinfo, fh)

519

t = self.buildtext(revinfo, fh)

486

if revlog.iscensored(base):

520

if revlog.iscensored(base):

487

# deltas based on a censored revision must replace the

521

# deltas based on a censored revision must replace the

488

# full content in one patch, so delta works everywhere

522

# full content in one patch, so delta works everywhere

489

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

523

header = mdiff.replacediffheader(revlog.rawsize(base), len(t))

490

delta = header + t

524

delta = header + t

491

else:

525

else:

492

ptext = revlog.revision(base, _df=fh, raw=True)

526

ptext = revlog.revision(base, _df=fh, raw=True)

493

delta = mdiff.textdiff(ptext, t)

527

delta = mdiff.textdiff(ptext, t)

494

528

495

return delta

529

return delta

496

530

497

def _builddeltainfo(self, revinfo, base, fh):

531

def _builddeltainfo(self, revinfo, base, fh):

498

# can we use the cached delta?

532

# can we use the cached delta?

499

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

533

if revinfo.cachedelta and revinfo.cachedelta[0] == base:

500

delta = revinfo.cachedelta[1]

534

delta = revinfo.cachedelta[1]

501

else:

535

else:

502

delta = self._builddeltadiff(base, revinfo, fh)

536

delta = self._builddeltadiff(base, revinfo, fh)

503

revlog = self.revlog

537

revlog = self.revlog

504

header, data = revlog.compress(delta)

538

header, data = revlog.compress(delta)

505

deltalen = len(header) + len(data)

539

deltalen = len(header) + len(data)

506

chainbase = revlog.chainbase(base)

540

chainbase = revlog.chainbase(base)

507

offset = revlog.end(len(revlog) - 1)

541

offset = revlog.end(len(revlog) - 1)

508

dist = deltalen + offset - revlog.start(chainbase)

542

dist = deltalen + offset - revlog.start(chainbase)

509

if revlog._generaldelta:

543

if revlog._generaldelta:

510

deltabase = base

544

deltabase = base

511

else:

545

else:

512

deltabase = chainbase

546

deltabase = chainbase

513

chainlen, compresseddeltalen = revlog._chaininfo(base)

547

chainlen, compresseddeltalen = revlog._chaininfo(base)

514

chainlen += 1

548

chainlen += 1

515

compresseddeltalen += deltalen

549

compresseddeltalen += deltalen

516

return _deltainfo(dist, deltalen, (header, data), deltabase,

550

return _deltainfo(dist, deltalen, (header, data), deltabase,

517

chainbase, chainlen, compresseddeltalen)

551

chainbase, chainlen, compresseddeltalen)

518

552

519

def finddeltainfo(self, revinfo, fh):

553

def finddeltainfo(self, revinfo, fh):

520

"""Find an acceptable delta against a candidate revision

554

"""Find an acceptable delta against a candidate revision

521

555

522

revinfo: information about the revision (instance of _revisioninfo)

556

revinfo: information about the revision (instance of _revisioninfo)

523

fh: file handle to either the .i or the .d revlog file,

557

fh: file handle to either the .i or the .d revlog file,

524

depending on whether it is inlined or not

558

depending on whether it is inlined or not

525

559

526

Returns the first acceptable candidate revision, as ordered by

560

Returns the first acceptable candidate revision, as ordered by

527

_getcandidaterevs

561

_getcandidaterevs

528

"""

562

"""

529

cachedelta = revinfo.cachedelta

563

cachedelta = revinfo.cachedelta

530

p1 = revinfo.p1

564

p1 = revinfo.p1

531

p2 = revinfo.p2

565

p2 = revinfo.p2

532

revlog = self.revlog

566

revlog = self.revlog

533

567

534

deltainfo = None

568

deltainfo = None

535

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

569

for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):

536

nominateddeltas = []

570

nominateddeltas = []

537

for candidaterev in candidaterevs:

571

for candidaterev in candidaterevs:

538

# no delta for rawtext-changing revs (see "candelta" for why)

572

# no delta for rawtext-changing revs (see "candelta" for why)

539

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

573

if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:

540

continue

574

continue

541

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

575

candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)

542

if revlog._isgooddeltainfo(candidatedelta, revinfo):

576

if revlog._isgooddeltainfo(candidatedelta, revinfo):

543

nominateddeltas.append(candidatedelta)

577

nominateddeltas.append(candidatedelta)

544

if nominateddeltas:

578

if nominateddeltas:

545

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

579

deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)

546

break

580

break

547

581

548

return deltainfo

582

return deltainfo

549

583

550

@attr.s(slots=True, frozen=True)

584

@attr.s(slots=True, frozen=True)

551

class _revisioninfo(object):

585

class _revisioninfo(object):

552

"""Information about a revision that allows building its fulltext

586

"""Information about a revision that allows building its fulltext

553

node: expected hash of the revision

587

node: expected hash of the revision

554

p1, p2: parent revs of the revision

588

p1, p2: parent revs of the revision

555

btext: built text cache consisting of a one-element list

589

btext: built text cache consisting of a one-element list

556

cachedelta: (baserev, uncompressed_delta) or None

590

cachedelta: (baserev, uncompressed_delta) or None

557

flags: flags associated to the revision storage

591

flags: flags associated to the revision storage

558

592

559

One of btext[0] or cachedelta must be set.

593

One of btext[0] or cachedelta must be set.

560

"""

594

"""

561

node = attr.ib()

595

node = attr.ib()

562

p1 = attr.ib()

596

p1 = attr.ib()

563

p2 = attr.ib()

597

p2 = attr.ib()

564

btext = attr.ib()

598

btext = attr.ib()

565

textlen = attr.ib()

599

textlen = attr.ib()

566

cachedelta = attr.ib()

600

cachedelta = attr.ib()

567

flags = attr.ib()

601

flags = attr.ib()

568

602

569

# index v0:

603

# index v0:

570

# 4 bytes: offset

604

# 4 bytes: offset

571

# 4 bytes: compressed length

605

# 4 bytes: compressed length

572

# 4 bytes: base rev

606

# 4 bytes: base rev

573

# 4 bytes: link rev

607

# 4 bytes: link rev

574

# 20 bytes: parent 1 nodeid

608

# 20 bytes: parent 1 nodeid

575

# 20 bytes: parent 2 nodeid

609

# 20 bytes: parent 2 nodeid

576

# 20 bytes: nodeid

610

# 20 bytes: nodeid

577

indexformatv0 = struct.Struct(">4l20s20s20s")

611

indexformatv0 = struct.Struct(">4l20s20s20s")

578

indexformatv0_pack = indexformatv0.pack

612

indexformatv0_pack = indexformatv0.pack

579

indexformatv0_unpack = indexformatv0.unpack

613

indexformatv0_unpack = indexformatv0.unpack

580

614

581

class revlogoldio(object):

615

class revlogoldio(object):

582

def __init__(self):

616

def __init__(self):

583

self.size = indexformatv0.size

617

self.size = indexformatv0.size

584

618

585

def parseindex(self, data, inline):

619

def parseindex(self, data, inline):

586

s = self.size

620

s = self.size

587

index = []

621

index = []

588

nodemap = {nullid: nullrev}

622

nodemap = {nullid: nullrev}

589

n = off = 0

623

n = off = 0

590

l = len(data)

624

l = len(data)

591

while off + s <= l:

625

while off + s <= l:

592

cur = data[off:off + s]

626

cur = data[off:off + s]

593

off += s

627

off += s

594

e = indexformatv0_unpack(cur)

628

e = indexformatv0_unpack(cur)

595

# transform to revlogv1 format

629

# transform to revlogv1 format

596

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

630

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

597

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

631

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

598

index.append(e2)

632

index.append(e2)

599

nodemap[e[6]] = n

633

nodemap[e[6]] = n

600

n += 1

634

n += 1

601

635

602

# add the magic null revision at -1

636

# add the magic null revision at -1

603

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

637

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

604

638

605

return index, nodemap, None

639

return index, nodemap, None

606

640

607

def packentry(self, entry, node, version, rev):

641

def packentry(self, entry, node, version, rev):

608

if gettype(entry[0]):

642

if gettype(entry[0]):

609

raise RevlogError(_('index entry flags need revlog version 1'))

643

raise RevlogError(_('index entry flags need revlog version 1'))

610

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

644

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

611

node(entry[5]), node(entry[6]), entry[7])

645

node(entry[5]), node(entry[6]), entry[7])

612

return indexformatv0_pack(*e2)

646

return indexformatv0_pack(*e2)

613

647

614

# index ng:

648

# index ng:

615

# 6 bytes: offset

649

# 6 bytes: offset

616

# 2 bytes: flags

650

# 2 bytes: flags

617

# 4 bytes: compressed length

651

# 4 bytes: compressed length

618

# 4 bytes: uncompressed length

652

# 4 bytes: uncompressed length

619

# 4 bytes: base rev

653

# 4 bytes: base rev

620

# 4 bytes: link rev

654

# 4 bytes: link rev

621

# 4 bytes: parent 1 rev

655

# 4 bytes: parent 1 rev

622

# 4 bytes: parent 2 rev

656

# 4 bytes: parent 2 rev

623

# 32 bytes: nodeid

657

# 32 bytes: nodeid

624

indexformatng = struct.Struct(">Qiiiiii20s12x")

658

indexformatng = struct.Struct(">Qiiiiii20s12x")

625

indexformatng_pack = indexformatng.pack

659

indexformatng_pack = indexformatng.pack

626

versionformat = struct.Struct(">I")

660

versionformat = struct.Struct(">I")

627

versionformat_pack = versionformat.pack

661

versionformat_pack = versionformat.pack

628

versionformat_unpack = versionformat.unpack

662

versionformat_unpack = versionformat.unpack

629

663

630

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

664

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

631

# signed integer)

665

# signed integer)

632

_maxentrysize = 0x7fffffff

666

_maxentrysize = 0x7fffffff

633

667

634

class revlogio(object):

668

class revlogio(object):

635

def __init__(self):

669

def __init__(self):

636

self.size = indexformatng.size

670

self.size = indexformatng.size

637

671

638

def parseindex(self, data, inline):

672

def parseindex(self, data, inline):

639

# call the C implementation to parse the index data

673

# call the C implementation to parse the index data

640

index, cache = parsers.parse_index2(data, inline)

674

index, cache = parsers.parse_index2(data, inline)

641

return index, getattr(index, 'nodemap', None), cache

675

return index, getattr(index, 'nodemap', None), cache

642

676

643

def packentry(self, entry, node, version, rev):

677

def packentry(self, entry, node, version, rev):

644

p = indexformatng_pack(*entry)

678

p = indexformatng_pack(*entry)

645

if rev == 0:

679

if rev == 0:

646

p = versionformat_pack(version) + p[4:]

680

p = versionformat_pack(version) + p[4:]

647

return p

681

return p

648

682

649

class revlog(object):

683

class revlog(object):

650

"""

684

"""

651

the underlying revision storage object

685

the underlying revision storage object

652

686

653

A revlog consists of two parts, an index and the revision data.

687

A revlog consists of two parts, an index and the revision data.

654

688

655

The index is a file with a fixed record size containing

689

The index is a file with a fixed record size containing

656

information on each revision, including its nodeid (hash), the

690

information on each revision, including its nodeid (hash), the

657

nodeids of its parents, the position and offset of its data within

691

nodeids of its parents, the position and offset of its data within

658

the data file, and the revision it's based on. Finally, each entry

692

the data file, and the revision it's based on. Finally, each entry

659

contains a linkrev entry that can serve as a pointer to external

693

contains a linkrev entry that can serve as a pointer to external

660

data.

694

data.

661

695

662

The revision data itself is a linear collection of data chunks.

696

The revision data itself is a linear collection of data chunks.

663

Each chunk represents a revision and is usually represented as a

697

Each chunk represents a revision and is usually represented as a

664

delta against the previous chunk. To bound lookup time, runs of

698

delta against the previous chunk. To bound lookup time, runs of

665

deltas are limited to about 2 times the length of the original

699

deltas are limited to about 2 times the length of the original

666

version data. This makes retrieval of a version proportional to

700

version data. This makes retrieval of a version proportional to

667

its size, or O(1) relative to the number of revisions.

701

its size, or O(1) relative to the number of revisions.

668

702

669

Both pieces of the revlog are written to in an append-only

703

Both pieces of the revlog are written to in an append-only

670

fashion, which means we never need to rewrite a file to insert or

704

fashion, which means we never need to rewrite a file to insert or

671

remove data, and can use some simple techniques to avoid the need

705

remove data, and can use some simple techniques to avoid the need

672

for locking while reading.

706

for locking while reading.

673

707

674

If checkambig, indexfile is opened with checkambig=True at

708

If checkambig, indexfile is opened with checkambig=True at

675

writing, to avoid file stat ambiguity.

709

writing, to avoid file stat ambiguity.

676

710

677

If mmaplargeindex is True, and an mmapindexthreshold is set, the

711

If mmaplargeindex is True, and an mmapindexthreshold is set, the

678

index will be mmapped rather than read if it is larger than the

712

index will be mmapped rather than read if it is larger than the

679

configured threshold.

713

configured threshold.

680

714

681

If censorable is True, the revlog can have censored revisions.

715

If censorable is True, the revlog can have censored revisions.

682

"""

716

"""

683

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

717

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

684

mmaplargeindex=False, censorable=False):

718

mmaplargeindex=False, censorable=False):

685

"""

719

"""

686

create a revlog object

720

create a revlog object

687

721

688

opener is a function that abstracts the file opening operation

722

opener is a function that abstracts the file opening operation

689

and can be used to implement COW semantics or the like.

723

and can be used to implement COW semantics or the like.

690

"""

724

"""

691

self.indexfile = indexfile

725

self.indexfile = indexfile

692

self.datafile = datafile or (indexfile[:-2] + ".d")

726

self.datafile = datafile or (indexfile[:-2] + ".d")

693

self.opener = opener

727

self.opener = opener

694

# When True, indexfile is opened with checkambig=True at writing, to

728

# When True, indexfile is opened with checkambig=True at writing, to

695

# avoid file stat ambiguity.

729

# avoid file stat ambiguity.

696

self._checkambig = checkambig

730

self._checkambig = checkambig

697

self._censorable = censorable

731

self._censorable = censorable

698

# 3-tuple of (node, rev, text) for a raw revision.

732

# 3-tuple of (node, rev, text) for a raw revision.

699

self._cache = None

733

self._cache = None

700

# Maps rev to chain base rev.

734

# Maps rev to chain base rev.

701

self._chainbasecache = util.lrucachedict(100)

735

self._chainbasecache = util.lrucachedict(100)

702

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

736

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

703

self._chunkcache = (0, '')

737

self._chunkcache = (0, '')

704

# How much data to read and cache into the raw revlog data cache.

738

# How much data to read and cache into the raw revlog data cache.

705

self._chunkcachesize = 65536

739

self._chunkcachesize = 65536

706

self._maxchainlen = None

740

self._maxchainlen = None

707

self._aggressivemergedeltas = True

741

self._aggressivemergedeltas = True

708

self.index = []

742

self.index = []

709

# Mapping of partial identifiers to full nodes.

743

# Mapping of partial identifiers to full nodes.

710

self._pcache = {}

744

self._pcache = {}

711

# Mapping of revision integer to full node.

745

# Mapping of revision integer to full node.

712

self._nodecache = {nullid: nullrev}

746

self._nodecache = {nullid: nullrev}

713

self._nodepos = None

747

self._nodepos = None

714

self._compengine = 'zlib'

748

self._compengine = 'zlib'

715

self._maxdeltachainspan = -1

749

self._maxdeltachainspan = -1

716

self._withsparseread = False

750

self._withsparseread = False

717

self._srdensitythreshold = 0.50

751

self._srdensitythreshold = 0.50

718

self._srmingapsize = 262144

752

self._srmingapsize = 262144

719

753

720

mmapindexthreshold = None

754

mmapindexthreshold = None

721

v = REVLOG_DEFAULT_VERSION

755

v = REVLOG_DEFAULT_VERSION

722

opts = getattr(opener, 'options', None)

756

opts = getattr(opener, 'options', None)

723

if opts is not None:

757

if opts is not None:

724

if 'revlogv2' in opts:

758

if 'revlogv2' in opts:

725

# version 2 revlogs always use generaldelta.

759

# version 2 revlogs always use generaldelta.

726

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

760

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

727

elif 'revlogv1' in opts:

761

elif 'revlogv1' in opts:

728

if 'generaldelta' in opts:

762

if 'generaldelta' in opts:

729

v |= FLAG_GENERALDELTA

763

v |= FLAG_GENERALDELTA

730

else:

764

else:

731

v = 0

765

v = 0

732

if 'chunkcachesize' in opts:

766

if 'chunkcachesize' in opts:

733

self._chunkcachesize = opts['chunkcachesize']

767

self._chunkcachesize = opts['chunkcachesize']

734

if 'maxchainlen' in opts:

768

if 'maxchainlen' in opts:

735

self._maxchainlen = opts['maxchainlen']

769

self._maxchainlen = opts['maxchainlen']

736

if 'aggressivemergedeltas' in opts:

770

if 'aggressivemergedeltas' in opts:

737

self._aggressivemergedeltas = opts['aggressivemergedeltas']

771

self._aggressivemergedeltas = opts['aggressivemergedeltas']

738

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

772

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

739

if 'compengine' in opts:

773

if 'compengine' in opts:

740

self._compengine = opts['compengine']

774

self._compengine = opts['compengine']

741

if 'maxdeltachainspan' in opts:

775

if 'maxdeltachainspan' in opts:

742

self._maxdeltachainspan = opts['maxdeltachainspan']

776

self._maxdeltachainspan = opts['maxdeltachainspan']

743

if mmaplargeindex and 'mmapindexthreshold' in opts:

777

if mmaplargeindex and 'mmapindexthreshold' in opts:

744

mmapindexthreshold = opts['mmapindexthreshold']

778

mmapindexthreshold = opts['mmapindexthreshold']

745

self._withsparseread = bool(opts.get('with-sparse-read', False))

779

self._withsparseread = bool(opts.get('with-sparse-read', False))

746

if 'sparse-read-density-threshold' in opts:

780

if 'sparse-read-density-threshold' in opts:

747

self._srdensitythreshold = opts['sparse-read-density-threshold']

781

self._srdensitythreshold = opts['sparse-read-density-threshold']

748

if 'sparse-read-min-gap-size' in opts:

782

if 'sparse-read-min-gap-size' in opts:

749

self._srmingapsize = opts['sparse-read-min-gap-size']

783

self._srmingapsize = opts['sparse-read-min-gap-size']

750

784

751

if self._chunkcachesize <= 0:

785

if self._chunkcachesize <= 0:

752

raise RevlogError(_('revlog chunk cache size %r is not greater '

786

raise RevlogError(_('revlog chunk cache size %r is not greater '

753

'than 0') % self._chunkcachesize)

787

'than 0') % self._chunkcachesize)

754

elif self._chunkcachesize & (self._chunkcachesize - 1):

788

elif self._chunkcachesize & (self._chunkcachesize - 1):

755

raise RevlogError(_('revlog chunk cache size %r is not a power '

789

raise RevlogError(_('revlog chunk cache size %r is not a power '

756

'of 2') % self._chunkcachesize)

790

'of 2') % self._chunkcachesize)

757

791

758

indexdata = ''

792

indexdata = ''

759

self._initempty = True

793

self._initempty = True

760

try:

794

try:

761

with self._indexfp() as f:

795

with self._indexfp() as f:

762

if (mmapindexthreshold is not None and

796

if (mmapindexthreshold is not None and

763

self.opener.fstat(f).st_size >= mmapindexthreshold):

797

self.opener.fstat(f).st_size >= mmapindexthreshold):

764

indexdata = util.buffer(util.mmapread(f))

798

indexdata = util.buffer(util.mmapread(f))

765

else:

799

else:

766

indexdata = f.read()

800

indexdata = f.read()

767

if len(indexdata) > 0:

801

if len(indexdata) > 0:

768

v = versionformat_unpack(indexdata[:4])[0]

802

v = versionformat_unpack(indexdata[:4])[0]

769

self._initempty = False

803

self._initempty = False

770

except IOError as inst:

804

except IOError as inst:

771

if inst.errno != errno.ENOENT:

805

if inst.errno != errno.ENOENT:

772

raise

806

raise

773

807

774

self.version = v

808

self.version = v

775

self._inline = v & FLAG_INLINE_DATA

809

self._inline = v & FLAG_INLINE_DATA

776

self._generaldelta = v & FLAG_GENERALDELTA

810

self._generaldelta = v & FLAG_GENERALDELTA

777

flags = v & ~0xFFFF

811

flags = v & ~0xFFFF

778

fmt = v & 0xFFFF

812

fmt = v & 0xFFFF

779

if fmt == REVLOGV0:

813

if fmt == REVLOGV0:

780

if flags:

814

if flags:

781

raise RevlogError(_('unknown flags (%#04x) in version %d '

815

raise RevlogError(_('unknown flags (%#04x) in version %d '

782

'revlog %s') %

816

'revlog %s') %

783

(flags >> 16, fmt, self.indexfile))

817

(flags >> 16, fmt, self.indexfile))

784

elif fmt == REVLOGV1:

818

elif fmt == REVLOGV1:

785

if flags & ~REVLOGV1_FLAGS:

819

if flags & ~REVLOGV1_FLAGS:

786

raise RevlogError(_('unknown flags (%#04x) in version %d '

820

raise RevlogError(_('unknown flags (%#04x) in version %d '

787

'revlog %s') %

821

'revlog %s') %

788

(flags >> 16, fmt, self.indexfile))

822

(flags >> 16, fmt, self.indexfile))

789

elif fmt == REVLOGV2:

823

elif fmt == REVLOGV2:

790

if flags & ~REVLOGV2_FLAGS:

824

if flags & ~REVLOGV2_FLAGS:

791

raise RevlogError(_('unknown flags (%#04x) in version %d '

825

raise RevlogError(_('unknown flags (%#04x) in version %d '

792

'revlog %s') %

826

'revlog %s') %

793

(flags >> 16, fmt, self.indexfile))

827

(flags >> 16, fmt, self.indexfile))

794

else:

828

else:

795

raise RevlogError(_('unknown version (%d) in revlog %s') %

829

raise RevlogError(_('unknown version (%d) in revlog %s') %

796

(fmt, self.indexfile))

830

(fmt, self.indexfile))

797

831

798

self.storedeltachains = True

832

self.storedeltachains = True

799

833

800

self._io = revlogio()

834

self._io = revlogio()

801

if self.version == REVLOGV0:

835

if self.version == REVLOGV0:

802

self._io = revlogoldio()

836

self._io = revlogoldio()

803

try:

837

try:

804

d = self._io.parseindex(indexdata, self._inline)

838

d = self._io.parseindex(indexdata, self._inline)

805

except (ValueError, IndexError):

839

except (ValueError, IndexError):

806

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

840

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

807

self.index, nodemap, self._chunkcache = d

841

self.index, nodemap, self._chunkcache = d

808

if nodemap is not None:

842

if nodemap is not None:

809

self.nodemap = self._nodecache = nodemap

843

self.nodemap = self._nodecache = nodemap

810

if not self._chunkcache:

844

if not self._chunkcache:

811

self._chunkclear()

845

self._chunkclear()

812

# revnum -> (chain-length, sum-delta-length)

846

# revnum -> (chain-length, sum-delta-length)

813

self._chaininfocache = {}

847

self._chaininfocache = {}

814

# revlog header -> revlog compressor

848

# revlog header -> revlog compressor

815

self._decompressors = {}

849

self._decompressors = {}

816

850

817

@util.propertycache

851

@util.propertycache

818

def _compressor(self):

852

def _compressor(self):

819

return util.compengines[self._compengine].revlogcompressor()

853

return util.compengines[self._compengine].revlogcompressor()

820

854

821

def _indexfp(self, mode='r'):

855

def _indexfp(self, mode='r'):

822

"""file object for the revlog's index file"""

856

"""file object for the revlog's index file"""

823

args = {r'mode': mode}

857

args = {r'mode': mode}

824

if mode != 'r':

858

if mode != 'r':

825

args[r'checkambig'] = self._checkambig

859

args[r'checkambig'] = self._checkambig

826

if mode == 'w':

860

if mode == 'w':

827

args[r'atomictemp'] = True

861

args[r'atomictemp'] = True

828

return self.opener(self.indexfile, **args)

862

return self.opener(self.indexfile, **args)

829

863

830

def _datafp(self, mode='r'):

864

def _datafp(self, mode='r'):

831

"""file object for the revlog's data file"""

865

"""file object for the revlog's data file"""

832

return self.opener(self.datafile, mode=mode)

866

return self.opener(self.datafile, mode=mode)

833

867

834

@contextlib.contextmanager

868

@contextlib.contextmanager

835

def _datareadfp(self, existingfp=None):

869

def _datareadfp(self, existingfp=None):

836

"""file object suitable to read data"""

870

"""file object suitable to read data"""

837

if existingfp is not None:

871

if existingfp is not None:

838

yield existingfp

872

yield existingfp

839

else:

873

else:

840

if self._inline:

874

if self._inline:

841

func = self._indexfp

875

func = self._indexfp

842

else:

876

else:

843

func = self._datafp

877

func = self._datafp

844

with func() as fp:

878

with func() as fp:

845

yield fp

879

yield fp

846

880

847

def tip(self):

881

def tip(self):

848

return self.node(len(self.index) - 2)

882

return self.node(len(self.index) - 2)

849

def __contains__(self, rev):

883

def __contains__(self, rev):

850

return 0 <= rev < len(self)

884

return 0 <= rev < len(self)

851

def __len__(self):

885

def __len__(self):

852

return len(self.index) - 1

886

return len(self.index) - 1

853

def __iter__(self):

887

def __iter__(self):

854

return iter(xrange(len(self)))

888

return iter(xrange(len(self)))

855

def revs(self, start=0, stop=None):

889

def revs(self, start=0, stop=None):

856

"""iterate over all rev in this revlog (from start to stop)"""

890

"""iterate over all rev in this revlog (from start to stop)"""

857

step = 1

891

step = 1

858

if stop is not None:

892

if stop is not None:

859

if start > stop:

893

if start > stop:

860

step = -1

894

step = -1

861

stop += step

895

stop += step

862

else:

896

else:

863

stop = len(self)

897

stop = len(self)

864

return xrange(start, stop, step)

898

return xrange(start, stop, step)

865

899

866

@util.propertycache

900

@util.propertycache

867

def nodemap(self):

901

def nodemap(self):

868

self.rev(self.node(0))

902

self.rev(self.node(0))

869

return self._nodecache

903

return self._nodecache

870

904

871

def hasnode(self, node):

905

def hasnode(self, node):

872

try:

906

try:

873

self.rev(node)

907

self.rev(node)

874

return True

908

return True

875

except KeyError:

909

except KeyError:

876

return False

910

return False

877

911

878

def candelta(self, baserev, rev):

912

def candelta(self, baserev, rev):

879

"""whether two revisions (baserev, rev) can be delta-ed or not"""

913

"""whether two revisions (baserev, rev) can be delta-ed or not"""

880

# Disable delta if either rev requires a content-changing flag

914

# Disable delta if either rev requires a content-changing flag

881

# processor (ex. LFS). This is because such flag processor can alter

915

# processor (ex. LFS). This is because such flag processor can alter

882

# the rawtext content that the delta will be based on, and two clients

916

# the rawtext content that the delta will be based on, and two clients

883

# could have a same revlog node with different flags (i.e. different

917

# could have a same revlog node with different flags (i.e. different

884

# rawtext contents) and the delta could be incompatible.

918

# rawtext contents) and the delta could be incompatible.

885

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

919

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

886

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

920

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

887

return False

921

return False

888

return True

922

return True

889

923

890

def clearcaches(self):

924

def clearcaches(self):

891

self._cache = None

925

self._cache = None

892

self._chainbasecache.clear()

926

self._chainbasecache.clear()

893

self._chunkcache = (0, '')

927

self._chunkcache = (0, '')

894

self._pcache = {}

928

self._pcache = {}

895

929

896

try:

930

try:

897

self._nodecache.clearcaches()

931

self._nodecache.clearcaches()

898

except AttributeError:

932

except AttributeError:

899

self._nodecache = {nullid: nullrev}

933

self._nodecache = {nullid: nullrev}

900

self._nodepos = None

934

self._nodepos = None

901

935

902

def rev(self, node):

936

def rev(self, node):

903

try:

937

try:

904

return self._nodecache[node]

938

return self._nodecache[node]

905

except TypeError:

939

except TypeError:

906

raise

940

raise

907

except RevlogError:

941

except RevlogError:

908

# parsers.c radix tree lookup failed

942

# parsers.c radix tree lookup failed

909

if node == wdirid or node in wdirfilenodeids:

943

if node == wdirid or node in wdirfilenodeids:

910

raise error.WdirUnsupported

944

raise error.WdirUnsupported

911

raise LookupError(node, self.indexfile, _('no node'))

945

raise LookupError(node, self.indexfile, _('no node'))

912

except KeyError:

946

except KeyError:

913

# pure python cache lookup failed

947

# pure python cache lookup failed

914

n = self._nodecache

948

n = self._nodecache

915

i = self.index

949

i = self.index

916

p = self._nodepos

950

p = self._nodepos

917

if p is None:

951

if p is None:

918

p = len(i) - 2

952

p = len(i) - 2

919

else:

953

else:

920

assert p < len(i)

954

assert p < len(i)

921

for r in xrange(p, -1, -1):

955

for r in xrange(p, -1, -1):

922

v = i[r][7]

956

v = i[r][7]

923

n[v] = r

957

n[v] = r

924

if v == node:

958

if v == node:

925

self._nodepos = r - 1

959

self._nodepos = r - 1

926

return r

960

return r

927

if node == wdirid or node in wdirfilenodeids:

961

if node == wdirid or node in wdirfilenodeids:

928

raise error.WdirUnsupported

962

raise error.WdirUnsupported

929

raise LookupError(node, self.indexfile, _('no node'))

963

raise LookupError(node, self.indexfile, _('no node'))

930

964

931

# Accessors for index entries.

965

# Accessors for index entries.

932

966

933

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

967

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

934

# are flags.

968

# are flags.

935

def start(self, rev):

969

def start(self, rev):

936

return int(self.index[rev][0] >> 16)

970

return int(self.index[rev][0] >> 16)

937

971

938

def flags(self, rev):

972

def flags(self, rev):

939

return self.index[rev][0] & 0xFFFF

973

return self.index[rev][0] & 0xFFFF

940

974

941

def length(self, rev):

975

def length(self, rev):

942

return self.index[rev][1]

976

return self.index[rev][1]

943

977

944

def rawsize(self, rev):

978

def rawsize(self, rev):

945

"""return the length of the uncompressed text for a given revision"""

979

"""return the length of the uncompressed text for a given revision"""

946

l = self.index[rev][2]

980

l = self.index[rev][2]

947

if l >= 0:

981

if l >= 0:

948

return l

982

return l

949

983

950

t = self.revision(rev, raw=True)

984

t = self.revision(rev, raw=True)

951

return len(t)

985

return len(t)

952

986

953

def size(self, rev):

987

def size(self, rev):

954

"""length of non-raw text (processed by a "read" flag processor)"""

988

"""length of non-raw text (processed by a "read" flag processor)"""

955

# fast path: if no "read" flag processor could change the content,

989

# fast path: if no "read" flag processor could change the content,

956

# size is rawsize. note: ELLIPSIS is known to not change the content.

990

# size is rawsize. note: ELLIPSIS is known to not change the content.

957

flags = self.flags(rev)

991

flags = self.flags(rev)

958

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

992

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

959

return self.rawsize(rev)

993

return self.rawsize(rev)

960

994

961

return len(self.revision(rev, raw=False))

995

return len(self.revision(rev, raw=False))

962

996

963

def chainbase(self, rev):

997

def chainbase(self, rev):

964

base = self._chainbasecache.get(rev)

998

base = self._chainbasecache.get(rev)

965

if base is not None:

999

if base is not None:

966

return base

1000

return base

967

1001

968

index = self.index

1002

index = self.index

969

iterrev = rev

1003

iterrev = rev

970

base = index[iterrev][3]

1004

base = index[iterrev][3]

971

while base != iterrev:

1005

while base != iterrev:

972

iterrev = base

1006

iterrev = base

973

base = index[iterrev][3]

1007

base = index[iterrev][3]

974

1008

975

self._chainbasecache[rev] = base

1009

self._chainbasecache[rev] = base

976

return base

1010

return base

977

1011

978

def linkrev(self, rev):

1012

def linkrev(self, rev):

979

return self.index[rev][4]

1013

return self.index[rev][4]

980

1014

981

def parentrevs(self, rev):

1015

def parentrevs(self, rev):

982

try:

1016

try:

983

entry = self.index[rev]

1017

entry = self.index[rev]

984

except IndexError:

1018

except IndexError:

985

if rev == wdirrev:

1019

if rev == wdirrev:

986

raise error.WdirUnsupported

1020

raise error.WdirUnsupported

987

raise

1021

raise

988

1022

989

return entry[5], entry[6]

1023

return entry[5], entry[6]

990

1024

991

def node(self, rev):

1025

def node(self, rev):

992

try:

1026

try:

993

return self.index[rev][7]

1027

return self.index[rev][7]

994

except IndexError:

1028

except IndexError:

995

if rev == wdirrev:

1029

if rev == wdirrev:

996

raise error.WdirUnsupported

1030

raise error.WdirUnsupported

997

raise

1031

raise

998

1032

999

# Derived from index values.

1033

# Derived from index values.

1000

1034

1001

def end(self, rev):

1035

def end(self, rev):

1002

return self.start(rev) + self.length(rev)

1036

return self.start(rev) + self.length(rev)

1003

1037

1004

def parents(self, node):

1038

def parents(self, node):

1005

i = self.index

1039

i = self.index

1006

d = i[self.rev(node)]

1040

d = i[self.rev(node)]

1007

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1041

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

1008

1042

1009

def chainlen(self, rev):

1043

def chainlen(self, rev):

1010

return self._chaininfo(rev)[0]

1044

return self._chaininfo(rev)[0]

1011

1045

1012

def _chaininfo(self, rev):

1046

def _chaininfo(self, rev):

1013

chaininfocache = self._chaininfocache

1047

chaininfocache = self._chaininfocache

1014

if rev in chaininfocache:

1048

if rev in chaininfocache:

1015

return chaininfocache[rev]

1049

return chaininfocache[rev]

1016

index = self.index

1050

index = self.index

1017

generaldelta = self._generaldelta

1051

generaldelta = self._generaldelta

1018

iterrev = rev

1052

iterrev = rev

1019

e = index[iterrev]

1053

e = index[iterrev]

1020

clen = 0

1054

clen = 0

1021

compresseddeltalen = 0

1055

compresseddeltalen = 0

1022

while iterrev != e[3]:

1056

while iterrev != e[3]:

1023

clen += 1

1057

clen += 1

1024

compresseddeltalen += e[1]

1058

compresseddeltalen += e[1]

1025

if generaldelta:

1059

if generaldelta:

1026

iterrev = e[3]

1060

iterrev = e[3]

1027

else:

1061

else:

1028

iterrev -= 1

1062

iterrev -= 1

1029

if iterrev in chaininfocache:

1063

if iterrev in chaininfocache:

1030

t = chaininfocache[iterrev]

1064

t = chaininfocache[iterrev]

1031

clen += t[0]

1065

clen += t[0]

1032

compresseddeltalen += t[1]

1066

compresseddeltalen += t[1]

1033

break

1067

break

1034

e = index[iterrev]

1068

e = index[iterrev]

1035

else:

1069

else:

1036

# Add text length of base since decompressing that also takes

1070

# Add text length of base since decompressing that also takes

1037

# work. For cache hits the length is already included.

1071

# work. For cache hits the length is already included.

1038

compresseddeltalen += e[1]

1072

compresseddeltalen += e[1]

1039

r = (clen, compresseddeltalen)

1073

r = (clen, compresseddeltalen)

1040

chaininfocache[rev] = r

1074

chaininfocache[rev] = r

1041

return r

1075

return r

1042

1076

1043

def _deltachain(self, rev, stoprev=None):

1077

def _deltachain(self, rev, stoprev=None):

1044

"""Obtain the delta chain for a revision.

1078

"""Obtain the delta chain for a revision.

1045

1079

1046

``stoprev`` specifies a revision to stop at. If not specified, we

1080

``stoprev`` specifies a revision to stop at. If not specified, we

1047

stop at the base of the chain.

1081

stop at the base of the chain.

1048

1082

1049

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1083

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

1050

revs in ascending order and ``stopped`` is a bool indicating whether

1084

revs in ascending order and ``stopped`` is a bool indicating whether

1051

``stoprev`` was hit.

1085

``stoprev`` was hit.

1052

"""

1086

"""

1053

# Try C implementation.

1087

# Try C implementation.

1054

try:

1088

try:

1055

return self.index.deltachain(rev, stoprev, self._generaldelta)

1089

return self.index.deltachain(rev, stoprev, self._generaldelta)

1056

except AttributeError:

1090

except AttributeError:

1057

pass

1091

pass

1058

1092

1059

chain = []

1093

chain = []

1060

1094

1061

# Alias to prevent attribute lookup in tight loop.

1095

# Alias to prevent attribute lookup in tight loop.

1062

index = self.index

1096

index = self.index

1063

generaldelta = self._generaldelta

1097

generaldelta = self._generaldelta

1064

1098

1065

iterrev = rev

1099

iterrev = rev

1066

e = index[iterrev]

1100

e = index[iterrev]

1067

while iterrev != e[3] and iterrev != stoprev:

1101

while iterrev != e[3] and iterrev != stoprev:

1068

chain.append(iterrev)

1102

chain.append(iterrev)

1069

if generaldelta:

1103

if generaldelta:

1070

iterrev = e[3]

1104

iterrev = e[3]

1071

else:

1105

else:

1072

iterrev -= 1

1106

iterrev -= 1

1073

e = index[iterrev]

1107

e = index[iterrev]

1074

1108

1075

if iterrev == stoprev:

1109

if iterrev == stoprev:

1076

stopped = True

1110

stopped = True

1077

else:

1111

else:

1078

chain.append(iterrev)

1112

chain.append(iterrev)

1079

stopped = False

1113

stopped = False

1080

1114

1081

chain.reverse()

1115

chain.reverse()

1082

return chain, stopped

1116

return chain, stopped

1083

1117

1084

def ancestors(self, revs, stoprev=0, inclusive=False):

1118

def ancestors(self, revs, stoprev=0, inclusive=False):

1085

"""Generate the ancestors of 'revs' in reverse topological order.

1119

"""Generate the ancestors of 'revs' in reverse topological order.

1086

Does not generate revs lower than stoprev.

1120

Does not generate revs lower than stoprev.

1087

1121

1088

See the documentation for ancestor.lazyancestors for more details."""

1122

See the documentation for ancestor.lazyancestors for more details."""

1089

1123

1090

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1124

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

1091

inclusive=inclusive)

1125

inclusive=inclusive)

1092

1126

1093

def descendants(self, revs):

1127

def descendants(self, revs):

1094

"""Generate the descendants of 'revs' in revision order.

1128

"""Generate the descendants of 'revs' in revision order.

1095

1129

1096

Yield a sequence of revision numbers starting with a child of

1130

Yield a sequence of revision numbers starting with a child of

1097

some rev in revs, i.e., each revision is *not* considered a

1131

some rev in revs, i.e., each revision is *not* considered a

1098

descendant of itself. Results are ordered by revision number (a

1132

descendant of itself. Results are ordered by revision number (a

1099

topological sort)."""

1133

topological sort)."""

1100

first = min(revs)

1134

first = min(revs)

1101

if first == nullrev:

1135

if first == nullrev:

1102

for i in self:

1136

for i in self:

1103

yield i

1137

yield i

1104

return

1138

return

1105

1139

1106

seen = set(revs)

1140

seen = set(revs)

1107

for i in self.revs(start=first + 1):

1141

for i in self.revs(start=first + 1):

1108

for x in self.parentrevs(i):

1142

for x in self.parentrevs(i):

1109

if x != nullrev and x in seen:

1143

if x != nullrev and x in seen:

1110

seen.add(i)

1144

seen.add(i)

1111

yield i

1145

yield i

1112

break

1146

break

1113

1147

1114

def findcommonmissing(self, common=None, heads=None):

1148

def findcommonmissing(self, common=None, heads=None):

1115

"""Return a tuple of the ancestors of common and the ancestors of heads

1149

"""Return a tuple of the ancestors of common and the ancestors of heads

1116

that are not ancestors of common. In revset terminology, we return the

1150

that are not ancestors of common. In revset terminology, we return the

1117

tuple:

1151

tuple:

1118

1152

1119

::common, (::heads) - (::common)

1153

::common, (::heads) - (::common)

1120

1154

1121

The list is sorted by revision number, meaning it is

1155

The list is sorted by revision number, meaning it is

1122

topologically sorted.

1156

topologically sorted.

1123

1157

1124

'heads' and 'common' are both lists of node IDs. If heads is

1158

'heads' and 'common' are both lists of node IDs. If heads is

1125

not supplied, uses all of the revlog's heads. If common is not

1159

not supplied, uses all of the revlog's heads. If common is not

1126

supplied, uses nullid."""

1160

supplied, uses nullid."""

1127

if common is None:

1161

if common is None:

1128

common = [nullid]

1162

common = [nullid]

1129

if heads is None:

1163

if heads is None:

1130

heads = self.heads()

1164

heads = self.heads()

1131

1165

1132

common = [self.rev(n) for n in common]

1166

common = [self.rev(n) for n in common]

1133

heads = [self.rev(n) for n in heads]

1167

heads = [self.rev(n) for n in heads]

1134

1168

1135

# we want the ancestors, but inclusive

1169

# we want the ancestors, but inclusive

1136

class lazyset(object):

1170

class lazyset(object):

1137

def __init__(self, lazyvalues):

1171

def __init__(self, lazyvalues):

1138

self.addedvalues = set()

1172

self.addedvalues = set()

1139

self.lazyvalues = lazyvalues

1173

self.lazyvalues = lazyvalues

1140

1174

1141

def __contains__(self, value):

1175

def __contains__(self, value):

1142

return value in self.addedvalues or value in self.lazyvalues

1176

return value in self.addedvalues or value in self.lazyvalues

1143

1177

1144

def __iter__(self):

1178

def __iter__(self):

1145

added = self.addedvalues

1179

added = self.addedvalues

1146

for r in added:

1180

for r in added:

1147

yield r

1181

yield r

1148

for r in self.lazyvalues:

1182

for r in self.lazyvalues:

1149

if not r in added:

1183

if not r in added:

1150

yield r

1184

yield r

1151

1185

1152

def add(self, value):

1186

def add(self, value):

1153

self.addedvalues.add(value)

1187

self.addedvalues.add(value)

1154

1188

1155

def update(self, values):

1189

def update(self, values):

1156

self.addedvalues.update(values)

1190

self.addedvalues.update(values)

1157

1191

1158

has = lazyset(self.ancestors(common))

1192

has = lazyset(self.ancestors(common))

1159

has.add(nullrev)

1193

has.add(nullrev)

1160

has.update(common)

1194

has.update(common)

1161

1195

1162

# take all ancestors from heads that aren't in has

1196

# take all ancestors from heads that aren't in has

1163

missing = set()

1197

missing = set()

1164

visit = collections.deque(r for r in heads if r not in has)

1198

visit = collections.deque(r for r in heads if r not in has)

1165

while visit:

1199

while visit:

1166

r = visit.popleft()

1200

r = visit.popleft()

1167

if r in missing:

1201

if r in missing:

1168

continue

1202

continue

1169

else:

1203

else:

1170

missing.add(r)

1204

missing.add(r)

1171

for p in self.parentrevs(r):

1205

for p in self.parentrevs(r):

1172

if p not in has:

1206

if p not in has:

1173

visit.append(p)

1207

visit.append(p)

1174

missing = list(missing)

1208

missing = list(missing)

1175

missing.sort()

1209

missing.sort()

1176

return has, [self.node(miss) for miss in missing]

1210

return has, [self.node(miss) for miss in missing]

1177

1211

1178

def incrementalmissingrevs(self, common=None):

1212

def incrementalmissingrevs(self, common=None):

1179

"""Return an object that can be used to incrementally compute the

1213

"""Return an object that can be used to incrementally compute the

1180

revision numbers of the ancestors of arbitrary sets that are not

1214

revision numbers of the ancestors of arbitrary sets that are not

1181

ancestors of common. This is an ancestor.incrementalmissingancestors

1215

ancestors of common. This is an ancestor.incrementalmissingancestors

1182

object.

1216

object.

1183

1217

1184

'common' is a list of revision numbers. If common is not supplied, uses

1218

'common' is a list of revision numbers. If common is not supplied, uses

1185

nullrev.

1219

nullrev.

1186

"""

1220

"""

1187

if common is None:

1221

if common is None:

1188

common = [nullrev]

1222

common = [nullrev]

1189

1223

1190

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1224

return ancestor.incrementalmissingancestors(self.parentrevs, common)

1191

1225

1192

def findmissingrevs(self, common=None, heads=None):

1226

def findmissingrevs(self, common=None, heads=None):

1193

"""Return the revision numbers of the ancestors of heads that

1227

"""Return the revision numbers of the ancestors of heads that

1194

are not ancestors of common.

1228

are not ancestors of common.

1195

1229

1196

More specifically, return a list of revision numbers corresponding to

1230

More specifically, return a list of revision numbers corresponding to

1197

nodes N such that every N satisfies the following constraints:

1231

nodes N such that every N satisfies the following constraints:

1198

1232

1199

1. N is an ancestor of some node in 'heads'

1233

1. N is an ancestor of some node in 'heads'

1200

2. N is not an ancestor of any node in 'common'

1234

2. N is not an ancestor of any node in 'common'

1201

1235

1202

The list is sorted by revision number, meaning it is

1236

The list is sorted by revision number, meaning it is

1203

topologically sorted.

1237

topologically sorted.

1204

1238

1205

'heads' and 'common' are both lists of revision numbers. If heads is

1239

'heads' and 'common' are both lists of revision numbers. If heads is

1206

not supplied, uses all of the revlog's heads. If common is not

1240

not supplied, uses all of the revlog's heads. If common is not

1207

supplied, uses nullid."""

1241

supplied, uses nullid."""

1208

if common is None:

1242

if common is None:

1209

common = [nullrev]

1243

common = [nullrev]

1210

if heads is None:

1244

if heads is None:

1211

heads = self.headrevs()

1245

heads = self.headrevs()

1212

1246

1213

inc = self.incrementalmissingrevs(common=common)

1247

inc = self.incrementalmissingrevs(common=common)

1214

return inc.missingancestors(heads)

1248

return inc.missingancestors(heads)

1215

1249

1216

def findmissing(self, common=None, heads=None):

1250

def findmissing(self, common=None, heads=None):

1217

"""Return the ancestors of heads that are not ancestors of common.

1251

"""Return the ancestors of heads that are not ancestors of common.

1218

1252

1219

More specifically, return a list of nodes N such that every N

1253

More specifically, return a list of nodes N such that every N

1220

satisfies the following constraints:

1254

satisfies the following constraints:

1221

1255

1222

1. N is an ancestor of some node in 'heads'

1256

1. N is an ancestor of some node in 'heads'

1223

2. N is not an ancestor of any node in 'common'

1257

2. N is not an ancestor of any node in 'common'

1224

1258

1225

The list is sorted by revision number, meaning it is

1259

The list is sorted by revision number, meaning it is

1226

topologically sorted.

1260

topologically sorted.

1227

1261

1228

'heads' and 'common' are both lists of node IDs. If heads is

1262

'heads' and 'common' are both lists of node IDs. If heads is

1229

not supplied, uses all of the revlog's heads. If common is not

1263

not supplied, uses all of the revlog's heads. If common is not

1230

supplied, uses nullid."""

1264

supplied, uses nullid."""

1231

if common is None:

1265

if common is None:

1232

common = [nullid]

1266

common = [nullid]

1233

if heads is None:

1267

if heads is None:

1234

heads = self.heads()

1268

heads = self.heads()

1235

1269

1236

common = [self.rev(n) for n in common]

1270

common = [self.rev(n) for n in common]

1237

heads = [self.rev(n) for n in heads]

1271

heads = [self.rev(n) for n in heads]

1238

1272

1239

inc = self.incrementalmissingrevs(common=common)

1273

inc = self.incrementalmissingrevs(common=common)

1240

return [self.node(r) for r in inc.missingancestors(heads)]

1274

return [self.node(r) for r in inc.missingancestors(heads)]

1241

1275

1242

def nodesbetween(self, roots=None, heads=None):

1276

def nodesbetween(self, roots=None, heads=None):

1243

"""Return a topological path from 'roots' to 'heads'.

1277

"""Return a topological path from 'roots' to 'heads'.

1244

1278

1245

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1279

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

1246

topologically sorted list of all nodes N that satisfy both of

1280

topologically sorted list of all nodes N that satisfy both of

1247

these constraints:

1281

these constraints:

1248

1282

1249

1. N is a descendant of some node in 'roots'

1283

1. N is a descendant of some node in 'roots'

1250

2. N is an ancestor of some node in 'heads'

1284

2. N is an ancestor of some node in 'heads'

1251

1285

1252

Every node is considered to be both a descendant and an ancestor

1286

Every node is considered to be both a descendant and an ancestor

1253

of itself, so every reachable node in 'roots' and 'heads' will be

1287

of itself, so every reachable node in 'roots' and 'heads' will be

1254

included in 'nodes'.

1288

included in 'nodes'.

1255

1289

1256

'outroots' is the list of reachable nodes in 'roots', i.e., the

1290

'outroots' is the list of reachable nodes in 'roots', i.e., the

1257

subset of 'roots' that is returned in 'nodes'. Likewise,

1291

subset of 'roots' that is returned in 'nodes'. Likewise,

1258

'outheads' is the subset of 'heads' that is also in 'nodes'.

1292

'outheads' is the subset of 'heads' that is also in 'nodes'.

1259

1293

1260

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1294

'roots' and 'heads' are both lists of node IDs. If 'roots' is

1261

unspecified, uses nullid as the only root. If 'heads' is

1295

unspecified, uses nullid as the only root. If 'heads' is

1262

unspecified, uses list of all of the revlog's heads."""

1296

unspecified, uses list of all of the revlog's heads."""

1263

nonodes = ([], [], [])

1297

nonodes = ([], [], [])

1264

if roots is not None:

1298

if roots is not None:

1265

roots = list(roots)

1299

roots = list(roots)

1266

if not roots:

1300

if not roots:

1267

return nonodes

1301

return nonodes

1268

lowestrev = min([self.rev(n) for n in roots])

1302

lowestrev = min([self.rev(n) for n in roots])

1269

else:

1303

else:

1270

roots = [nullid] # Everybody's a descendant of nullid

1304

roots = [nullid] # Everybody's a descendant of nullid

1271

lowestrev = nullrev

1305

lowestrev = nullrev

1272

if (lowestrev == nullrev) and (heads is None):

1306

if (lowestrev == nullrev) and (heads is None):

1273

# We want _all_ the nodes!

1307

# We want _all_ the nodes!

1274

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1308

return ([self.node(r) for r in self], [nullid], list(self.heads()))

1275

if heads is None:

1309

if heads is None:

1276

# All nodes are ancestors, so the latest ancestor is the last

1310

# All nodes are ancestors, so the latest ancestor is the last

1277

# node.

1311

# node.

1278

highestrev = len(self) - 1

1312

highestrev = len(self) - 1

1279

# Set ancestors to None to signal that every node is an ancestor.

1313

# Set ancestors to None to signal that every node is an ancestor.

1280

ancestors = None

1314

ancestors = None

1281

# Set heads to an empty dictionary for later discovery of heads

1315

# Set heads to an empty dictionary for later discovery of heads

1282

heads = {}

1316

heads = {}

1283

else:

1317

else:

1284

heads = list(heads)

1318

heads = list(heads)

1285

if not heads:

1319

if not heads:

1286

return nonodes

1320

return nonodes

1287

ancestors = set()

1321

ancestors = set()

1288

# Turn heads into a dictionary so we can remove 'fake' heads.

1322

# Turn heads into a dictionary so we can remove 'fake' heads.

1289

# Also, later we will be using it to filter out the heads we can't

1323

# Also, later we will be using it to filter out the heads we can't

1290

# find from roots.

1324

# find from roots.

1291

heads = dict.fromkeys(heads, False)

1325

heads = dict.fromkeys(heads, False)

1292

# Start at the top and keep marking parents until we're done.

1326

# Start at the top and keep marking parents until we're done.

1293

nodestotag = set(heads)

1327

nodestotag = set(heads)

1294

# Remember where the top was so we can use it as a limit later.

1328

# Remember where the top was so we can use it as a limit later.

1295

highestrev = max([self.rev(n) for n in nodestotag])

1329

highestrev = max([self.rev(n) for n in nodestotag])

1296

while nodestotag:

1330

while nodestotag:

1297

# grab a node to tag

1331

# grab a node to tag

1298

n = nodestotag.pop()

1332

n = nodestotag.pop()

1299

# Never tag nullid

1333

# Never tag nullid

1300

if n == nullid:

1334

if n == nullid:

1301

continue

1335

continue

1302

# A node's revision number represents its place in a

1336

# A node's revision number represents its place in a

1303

# topologically sorted list of nodes.

1337

# topologically sorted list of nodes.

1304

r = self.rev(n)

1338

r = self.rev(n)

1305

if r >= lowestrev:

1339

if r >= lowestrev:

1306

if n not in ancestors:

1340

if n not in ancestors:

1307

# If we are possibly a descendant of one of the roots

1341

# If we are possibly a descendant of one of the roots

1308

# and we haven't already been marked as an ancestor

1342

# and we haven't already been marked as an ancestor

1309

ancestors.add(n) # Mark as ancestor

1343

ancestors.add(n) # Mark as ancestor

1310

# Add non-nullid parents to list of nodes to tag.

1344

# Add non-nullid parents to list of nodes to tag.

1311

nodestotag.update([p for p in self.parents(n) if

1345

nodestotag.update([p for p in self.parents(n) if

1312

p != nullid])

1346

p != nullid])

1313

elif n in heads: # We've seen it before, is it a fake head?

1347

elif n in heads: # We've seen it before, is it a fake head?

1314

# So it is, real heads should not be the ancestors of

1348

# So it is, real heads should not be the ancestors of

1315

# any other heads.

1349

# any other heads.

1316

heads.pop(n)

1350

heads.pop(n)

1317

if not ancestors:

1351

if not ancestors:

1318

return nonodes

1352

return nonodes

1319

# Now that we have our set of ancestors, we want to remove any

1353

# Now that we have our set of ancestors, we want to remove any

1320

# roots that are not ancestors.

1354

# roots that are not ancestors.

1321

1355

1322

# If one of the roots was nullid, everything is included anyway.

1356

# If one of the roots was nullid, everything is included anyway.

1323

if lowestrev > nullrev:

1357

if lowestrev > nullrev:

1324

# But, since we weren't, let's recompute the lowest rev to not

1358

# But, since we weren't, let's recompute the lowest rev to not

1325

# include roots that aren't ancestors.

1359

# include roots that aren't ancestors.

1326

1360

1327

# Filter out roots that aren't ancestors of heads

1361

# Filter out roots that aren't ancestors of heads

1328

roots = [root for root in roots if root in ancestors]

1362

roots = [root for root in roots if root in ancestors]

1329

# Recompute the lowest revision

1363

# Recompute the lowest revision

1330

if roots:

1364

if roots:

1331

lowestrev = min([self.rev(root) for root in roots])

1365

lowestrev = min([self.rev(root) for root in roots])

1332

else:

1366

else:

1333

# No more roots? Return empty list

1367

# No more roots? Return empty list

1334

return nonodes

1368

return nonodes

1335

else:

1369

else:

1336

# We are descending from nullid, and don't need to care about

1370

# We are descending from nullid, and don't need to care about

1337

# any other roots.

1371

# any other roots.

1338

lowestrev = nullrev

1372

lowestrev = nullrev

1339

roots = [nullid]

1373

roots = [nullid]

1340

# Transform our roots list into a set.

1374

# Transform our roots list into a set.

1341

descendants = set(roots)

1375

descendants = set(roots)

1342

# Also, keep the original roots so we can filter out roots that aren't

1376

# Also, keep the original roots so we can filter out roots that aren't

1343

# 'real' roots (i.e. are descended from other roots).

1377

# 'real' roots (i.e. are descended from other roots).

1344

roots = descendants.copy()

1378

roots = descendants.copy()

1345

# Our topologically sorted list of output nodes.

1379

# Our topologically sorted list of output nodes.

1346

orderedout = []

1380

orderedout = []

1347

# Don't start at nullid since we don't want nullid in our output list,

1381

# Don't start at nullid since we don't want nullid in our output list,

1348

# and if nullid shows up in descendants, empty parents will look like

1382

# and if nullid shows up in descendants, empty parents will look like

1349

# they're descendants.

1383

# they're descendants.

1350

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1384

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

1351

n = self.node(r)

1385

n = self.node(r)

1352

isdescendant = False

1386

isdescendant = False

1353

if lowestrev == nullrev: # Everybody is a descendant of nullid

1387

if lowestrev == nullrev: # Everybody is a descendant of nullid

1354

isdescendant = True

1388

isdescendant = True

1355

elif n in descendants:

1389

elif n in descendants:

1356

# n is already a descendant

1390

# n is already a descendant

1357

isdescendant = True

1391

isdescendant = True

1358

# This check only needs to be done here because all the roots

1392

# This check only needs to be done here because all the roots

1359

# will start being marked is descendants before the loop.

1393

# will start being marked is descendants before the loop.

1360

if n in roots:

1394

if n in roots:

1361

# If n was a root, check if it's a 'real' root.

1395

# If n was a root, check if it's a 'real' root.

1362

p = tuple(self.parents(n))

1396

p = tuple(self.parents(n))

1363

# If any of its parents are descendants, it's not a root.

1397

# If any of its parents are descendants, it's not a root.

1364

if (p[0] in descendants) or (p[1] in descendants):

1398

if (p[0] in descendants) or (p[1] in descendants):

1365

roots.remove(n)

1399

roots.remove(n)

1366

else:

1400

else:

1367

p = tuple(self.parents(n))

1401

p = tuple(self.parents(n))

1368

# A node is a descendant if either of its parents are

1402

# A node is a descendant if either of its parents are

1369

# descendants. (We seeded the dependents list with the roots

1403

# descendants. (We seeded the dependents list with the roots

1370

# up there, remember?)

1404

# up there, remember?)

1371

if (p[0] in descendants) or (p[1] in descendants):

1405

if (p[0] in descendants) or (p[1] in descendants):

1372

descendants.add(n)

1406

descendants.add(n)

1373

isdescendant = True

1407

isdescendant = True

1374

if isdescendant and ((ancestors is None) or (n in ancestors)):

1408

if isdescendant and ((ancestors is None) or (n in ancestors)):

1375

# Only include nodes that are both descendants and ancestors.

1409

# Only include nodes that are both descendants and ancestors.

1376

orderedout.append(n)

1410

orderedout.append(n)

1377

if (ancestors is not None) and (n in heads):

1411

if (ancestors is not None) and (n in heads):

1378

# We're trying to figure out which heads are reachable

1412

# We're trying to figure out which heads are reachable

1379

# from roots.

1413

# from roots.

1380

# Mark this head as having been reached

1414

# Mark this head as having been reached

1381

heads[n] = True

1415

heads[n] = True

1382

elif ancestors is None:

1416

elif ancestors is None:

1383

# Otherwise, we're trying to discover the heads.

1417

# Otherwise, we're trying to discover the heads.

1384

# Assume this is a head because if it isn't, the next step

1418

# Assume this is a head because if it isn't, the next step

1385

# will eventually remove it.

1419

# will eventually remove it.

1386

heads[n] = True

1420

heads[n] = True

1387

# But, obviously its parents aren't.

1421

# But, obviously its parents aren't.

1388

for p in self.parents(n):

1422

for p in self.parents(n):

1389

heads.pop(p, None)

1423

heads.pop(p, None)

1390

heads = [head for head, flag in heads.iteritems() if flag]

1424

heads = [head for head, flag in heads.iteritems() if flag]

1391

roots = list(roots)

1425

roots = list(roots)

1392

assert orderedout

1426

assert orderedout

1393

assert roots

1427

assert roots

1394

assert heads

1428

assert heads

1395

return (orderedout, roots, heads)

1429

return (orderedout, roots, heads)

1396

1430

1397

def headrevs(self):

1431

def headrevs(self):

1398

try:

1432

try:

1399

return self.index.headrevs()

1433

return self.index.headrevs()

1400

except AttributeError:

1434

except AttributeError:

1401

return self._headrevs()

1435

return self._headrevs()

1402

1436

1403

def computephases(self, roots):

1437

def computephases(self, roots):

1404

return self.index.computephasesmapsets(roots)

1438

return self.index.computephasesmapsets(roots)

1405

1439

1406

def _headrevs(self):

1440

def _headrevs(self):

1407

count = len(self)

1441

count = len(self)

1408

if not count:

1442

if not count:

1409

return [nullrev]

1443

return [nullrev]

1410

# we won't iter over filtered rev so nobody is a head at start

1444

# we won't iter over filtered rev so nobody is a head at start

1411

ishead = [0] * (count + 1)

1445

ishead = [0] * (count + 1)

1412

index = self.index

1446

index = self.index

1413

for r in self:

1447

for r in self:

1414

ishead[r] = 1 # I may be an head

1448

ishead[r] = 1 # I may be an head

1415

e = index[r]

1449

e = index[r]

1416

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1450

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1417

return [r for r, val in enumerate(ishead) if val]

1451

return [r for r, val in enumerate(ishead) if val]

1418

1452

1419

def heads(self, start=None, stop=None):

1453

def heads(self, start=None, stop=None):

1420

"""return the list of all nodes that have no children

1454

"""return the list of all nodes that have no children

1421

1455

1422

if start is specified, only heads that are descendants of

1456

if start is specified, only heads that are descendants of

1423

start will be returned

1457

start will be returned

1424

if stop is specified, it will consider all the revs from stop

1458

if stop is specified, it will consider all the revs from stop

1425

as if they had no children

1459

as if they had no children

1426

"""

1460

"""

1427

if start is None and stop is None:

1461

if start is None and stop is None:

1428

if not len(self):

1462

if not len(self):

1429

return [nullid]

1463

return [nullid]

1430

return [self.node(r) for r in self.headrevs()]

1464

return [self.node(r) for r in self.headrevs()]

1431

1465

1432

if start is None:

1466

if start is None:

1433

start = nullid

1467

start = nullid

1434

if stop is None:

1468

if stop is None:

1435

stop = []

1469

stop = []

1436

stoprevs = set([self.rev(n) for n in stop])

1470

stoprevs = set([self.rev(n) for n in stop])

1437

startrev = self.rev(start)

1471

startrev = self.rev(start)

1438

reachable = {startrev}

1472

reachable = {startrev}

1439

heads = {startrev}

1473

heads = {startrev}

1440

1474

1441

parentrevs = self.parentrevs

1475

parentrevs = self.parentrevs

1442

for r in self.revs(start=startrev + 1):

1476

for r in self.revs(start=startrev + 1):

1443

for p in parentrevs(r):

1477

for p in parentrevs(r):

1444

if p in reachable:

1478

if p in reachable:

1445

if r not in stoprevs:

1479

if r not in stoprevs:

1446

reachable.add(r)

1480

reachable.add(r)

1447

heads.add(r)

1481

heads.add(r)

1448

if p in heads and p not in stoprevs:

1482

if p in heads and p not in stoprevs:

1449

heads.remove(p)

1483

heads.remove(p)

1450

1484

1451

return [self.node(r) for r in heads]

1485

return [self.node(r) for r in heads]

1452

1486

1453

def children(self, node):

1487

def children(self, node):

1454

"""find the children of a given node"""

1488

"""find the children of a given node"""

1455

c = []

1489

c = []

1456

p = self.rev(node)

1490

p = self.rev(node)

1457

for r in self.revs(start=p + 1):

1491

for r in self.revs(start=p + 1):

1458

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1492

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1459

if prevs:

1493

if prevs:

1460

for pr in prevs:

1494

for pr in prevs:

1461

if pr == p:

1495

if pr == p:

1462

c.append(self.node(r))

1496

c.append(self.node(r))

1463

elif p == nullrev:

1497

elif p == nullrev:

1464

c.append(self.node(r))

1498

c.append(self.node(r))

1465

return c

1499

return c

1466

1500

1467

def descendant(self, start, end):

1501

def descendant(self, start, end):

1468

"""True if revision 'end' is an descendant of revision 'start'

1502

"""True if revision 'end' is an descendant of revision 'start'

1469

1503

1470

A revision is considered as a descendant of itself."""

1504

A revision is considered as a descendant of itself."""

1471

if start == nullrev:

1505

if start == nullrev:

1472

return True

1506

return True

1473

elif start == end:

1507

elif start == end:

1474

return True

1508

return True

1475

return start in self._commonancestorsheads(start, end)

1509

return start in self._commonancestorsheads(start, end)

1476

1510

1477

def commonancestorsheads(self, a, b):

1511

def commonancestorsheads(self, a, b):

1478

"""calculate all the heads of the common ancestors of nodes a and b"""

1512

"""calculate all the heads of the common ancestors of nodes a and b"""

1479

a, b = self.rev(a), self.rev(b)

1513

a, b = self.rev(a), self.rev(b)

1480

ancs = self._commonancestorsheads(a, b)

1514

ancs = self._commonancestorsheads(a, b)

1481

return pycompat.maplist(self.node, ancs)

1515

return pycompat.maplist(self.node, ancs)

1482

1516

1483

def _commonancestorsheads(self, *revs):

1517

def _commonancestorsheads(self, *revs):

1484

"""calculate all the heads of the common ancestors of revs"""

1518

"""calculate all the heads of the common ancestors of revs"""

1485

try:

1519

try:

1486

ancs = self.index.commonancestorsheads(*revs)

1520

ancs = self.index.commonancestorsheads(*revs)

1487

except (AttributeError, OverflowError): # C implementation failed

1521

except (AttributeError, OverflowError): # C implementation failed

1488

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1522

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1489

return ancs

1523

return ancs

1490

1524

1491

def isancestor(self, a, b):

1525

def isancestor(self, a, b):

1492

"""return True if node a is an ancestor of node b

1526

"""return True if node a is an ancestor of node b

1493

1527

1494

The implementation of this is trivial but the use of

1528

The implementation of this is trivial but the use of

1495

commonancestorsheads is not."""

1529

commonancestorsheads is not."""

1496

a, b = self.rev(a), self.rev(b)

1530

a, b = self.rev(a), self.rev(b)

1497

return self.descendant(a, b)

1531

return self.descendant(a, b)

1498

1532

1499

def ancestor(self, a, b):

1533

def ancestor(self, a, b):

1500

"""calculate the "best" common ancestor of nodes a and b"""

1534

"""calculate the "best" common ancestor of nodes a and b"""

1501

1535

1502

a, b = self.rev(a), self.rev(b)

1536

a, b = self.rev(a), self.rev(b)

1503

try:

1537

try:

1504

ancs = self.index.ancestors(a, b)

1538

ancs = self.index.ancestors(a, b)

1505

except (AttributeError, OverflowError):

1539

except (AttributeError, OverflowError):

1506

ancs = ancestor.ancestors(self.parentrevs, a, b)

1540

ancs = ancestor.ancestors(self.parentrevs, a, b)

1507

if ancs:

1541

if ancs:

1508

# choose a consistent winner when there's a tie

1542

# choose a consistent winner when there's a tie

1509

return min(map(self.node, ancs))

1543

return min(map(self.node, ancs))

1510

return nullid

1544

return nullid

1511

1545

1512

def _match(self, id):

1546

def _match(self, id):

1513

if isinstance(id, int):

1547

if isinstance(id, int):

1514

# rev

1548

# rev

1515

return self.node(id)

1549

return self.node(id)

1516

if len(id) == 20:

1550

if len(id) == 20:

1517

# possibly a binary node

1551

# possibly a binary node

1518

# odds of a binary node being all hex in ASCII are 1 in 10**25

1552

# odds of a binary node being all hex in ASCII are 1 in 10**25

1519

try:

1553

try:

1520

node = id

1554

node = id

1521

self.rev(node) # quick search the index

1555

self.rev(node) # quick search the index

1522

return node

1556

return node

1523

except LookupError:

1557

except LookupError:

1524

pass # may be partial hex id

1558

pass # may be partial hex id

1525

try:

1559

try:

1526

# str(rev)

1560

# str(rev)

1527

rev = int(id)

1561

rev = int(id)

1528

if "%d" % rev != id:

1562

if "%d" % rev != id:

1529

raise ValueError

1563

raise ValueError

1530

if rev < 0:

1564

if rev < 0:

1531

rev = len(self) + rev

1565

rev = len(self) + rev

1532

if rev < 0 or rev >= len(self):

1566

if rev < 0 or rev >= len(self):

1533

raise ValueError

1567

raise ValueError

1534

return self.node(rev)

1568

return self.node(rev)

1535

except (ValueError, OverflowError):

1569

except (ValueError, OverflowError):

1536

pass

1570

pass

1537

if len(id) == 40:

1571

if len(id) == 40:

1538

try:

1572

try:

1539

# a full hex nodeid?

1573

# a full hex nodeid?

1540

node = bin(id)

1574

node = bin(id)

1541

self.rev(node)

1575

self.rev(node)

1542

return node

1576

return node

1543

except (TypeError, LookupError):

1577

except (TypeError, LookupError):

1544

pass

1578

pass

1545

1579

1546

def _partialmatch(self, id):

1580

def _partialmatch(self, id):

1547

# we don't care wdirfilenodeids as they should be always full hash

1581

# we don't care wdirfilenodeids as they should be always full hash

1548

maybewdir = wdirhex.startswith(id)

1582

maybewdir = wdirhex.startswith(id)

1549

try:

1583

try:

1550

partial = self.index.partialmatch(id)

1584

partial = self.index.partialmatch(id)

1551

if partial and self.hasnode(partial):

1585

if partial and self.hasnode(partial):

1552

if maybewdir:

1586

if maybewdir:

1553

# single 'ff...' match in radix tree, ambiguous with wdir

1587

# single 'ff...' match in radix tree, ambiguous with wdir

1554

raise RevlogError

1588

raise RevlogError

1555

return partial

1589

return partial

1556

if maybewdir:

1590

if maybewdir:

1557

# no 'ff...' match in radix tree, wdir identified

1591

# no 'ff...' match in radix tree, wdir identified

1558

raise error.WdirUnsupported

1592

raise error.WdirUnsupported

1559

return None

1593

return None

1560

except RevlogError:

1594

except RevlogError:

1561

# parsers.c radix tree lookup gave multiple matches

1595

# parsers.c radix tree lookup gave multiple matches

1562

# fast path: for unfiltered changelog, radix tree is accurate

1596

# fast path: for unfiltered changelog, radix tree is accurate

1563

if not getattr(self, 'filteredrevs', None):

1597

if not getattr(self, 'filteredrevs', None):

1564

raise LookupError(id, self.indexfile,

1598

raise LookupError(id, self.indexfile,

1565

_('ambiguous identifier'))

1599

_('ambiguous identifier'))

1566

# fall through to slow path that filters hidden revisions

1600

# fall through to slow path that filters hidden revisions

1567

except (AttributeError, ValueError):

1601

except (AttributeError, ValueError):

1568

# we are pure python, or key was too short to search radix tree

1602

# we are pure python, or key was too short to search radix tree

1569

pass

1603

pass

1570

1604

1571

if id in self._pcache:

1605

if id in self._pcache:

1572

return self._pcache[id]

1606

return self._pcache[id]

1573

1607

1574

if len(id) <= 40:

1608

if len(id) <= 40:

1575

try:

1609

try:

1576

# hex(node)[:...]

1610

# hex(node)[:...]

1577

l = len(id) // 2 # grab an even number of digits

1611

l = len(id) // 2 # grab an even number of digits

1578

prefix = bin(id[:l * 2])

1612

prefix = bin(id[:l * 2])

1579

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1613

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1580

nl = [n for n in nl if hex(n).startswith(id) and

1614

nl = [n for n in nl if hex(n).startswith(id) and

1581

self.hasnode(n)]

1615

self.hasnode(n)]

1582

if len(nl) > 0:

1616

if len(nl) > 0:

1583

if len(nl) == 1 and not maybewdir:

1617

if len(nl) == 1 and not maybewdir:

1584

self._pcache[id] = nl[0]

1618

self._pcache[id] = nl[0]

1585

return nl[0]

1619

return nl[0]

1586

raise LookupError(id, self.indexfile,

1620

raise LookupError(id, self.indexfile,

1587

_('ambiguous identifier'))

1621

_('ambiguous identifier'))

1588

if maybewdir:

1622

if maybewdir:

1589

raise error.WdirUnsupported

1623

raise error.WdirUnsupported

1590

return None

1624

return None

1591

except TypeError:

1625

except TypeError:

1592

pass

1626

pass

1593

1627

1594

def lookup(self, id):

1628

def lookup(self, id):

1595

"""locate a node based on:

1629

"""locate a node based on:

1596

- revision number or str(revision number)

1630

- revision number or str(revision number)

1597

- nodeid or subset of hex nodeid

1631

- nodeid or subset of hex nodeid

1598

"""

1632

"""

1599

n = self._match(id)

1633

n = self._match(id)

1600

if n is not None:

1634

if n is not None:

1601

return n

1635

return n

1602

n = self._partialmatch(id)

1636

n = self._partialmatch(id)

1603

if n:

1637

if n:

1604

return n

1638

return n

1605

1639

1606

raise LookupError(id, self.indexfile, _('no match found'))

1640

raise LookupError(id, self.indexfile, _('no match found'))

1607

1641

1608

def shortest(self, node, minlength=1):

1642

def shortest(self, node, minlength=1):

1609

"""Find the shortest unambiguous prefix that matches node."""

1643

"""Find the shortest unambiguous prefix that matches node."""

1610

def isvalid(prefix):

1644

def isvalid(prefix):

1611

try:

1645

try:

1612

node = self._partialmatch(prefix)

1646

node = self._partialmatch(prefix)

1613

except error.RevlogError:

1647

except error.RevlogError:

1614

return False

1648

return False

1615

except error.WdirUnsupported:

1649

except error.WdirUnsupported:

1616

# single 'ff...' match

1650

# single 'ff...' match

1617

return True

1651

return True

1618

if node is None:

1652

if node is None:

1619

raise LookupError(node, self.indexfile, _('no node'))

1653

raise LookupError(node, self.indexfile, _('no node'))

1620

return True

1654

return True

1621

1655

1622

def maybewdir(prefix):

1656

def maybewdir(prefix):

1623

return all(c == 'f' for c in prefix)

1657

return all(c == 'f' for c in prefix)

1624

1658

1625

hexnode = hex(node)

1659

hexnode = hex(node)

1626

1660

1627

def disambiguate(hexnode, minlength):

1661

def disambiguate(hexnode, minlength):

1628

"""Disambiguate against wdirid."""

1662

"""Disambiguate against wdirid."""

1629

for length in range(minlength, 41):

1663

for length in range(minlength, 41):

1630

prefix = hexnode[:length]

1664

prefix = hexnode[:length]

1631

if not maybewdir(prefix):

1665

if not maybewdir(prefix):

1632

return prefix

1666

return prefix

1633

1667

1634

if not getattr(self, 'filteredrevs', None):

1668

if not getattr(self, 'filteredrevs', None):

1635

try:

1669

try:

1636

length = max(self.index.shortest(node), minlength)

1670

length = max(self.index.shortest(node), minlength)

1637

return disambiguate(hexnode, length)

1671

return disambiguate(hexnode, length)

1638

except RevlogError:

1672

except RevlogError:

1639

if node != wdirid:

1673

if node != wdirid:

1640

raise LookupError(node, self.indexfile, _('no node'))

1674

raise LookupError(node, self.indexfile, _('no node'))

1641

except AttributeError:

1675

except AttributeError:

1642

# Fall through to pure code

1676

# Fall through to pure code

1643

pass

1677

pass

1644

1678

1645

if node == wdirid:

1679

if node == wdirid:

1646

for length in range(minlength, 41):

1680

for length in range(minlength, 41):

1647

prefix = hexnode[:length]

1681

prefix = hexnode[:length]

1648

if isvalid(prefix):

1682

if isvalid(prefix):

1649

return prefix

1683

return prefix

1650

1684

1651

for length in range(minlength, 41):

1685

for length in range(minlength, 41):

1652

prefix = hexnode[:length]

1686

prefix = hexnode[:length]

1653

if isvalid(prefix):

1687

if isvalid(prefix):

1654

return disambiguate(hexnode, length)

1688

return disambiguate(hexnode, length)

1655

1689

1656

def cmp(self, node, text):

1690

def cmp(self, node, text):

1657

"""compare text with a given file revision

1691

"""compare text with a given file revision

1658

1692

1659

returns True if text is different than what is stored.

1693

returns True if text is different than what is stored.

1660

"""

1694

"""

1661

p1, p2 = self.parents(node)

1695

p1, p2 = self.parents(node)

1662

return hash(text, p1, p2) != node

1696

return hash(text, p1, p2) != node

1663

1697

1664

def _cachesegment(self, offset, data):

1698

def _cachesegment(self, offset, data):

1665

"""Add a segment to the revlog cache.

1699

"""Add a segment to the revlog cache.

1666

1700

1667

Accepts an absolute offset and the data that is at that location.

1701

Accepts an absolute offset and the data that is at that location.

1668

"""

1702

"""

1669

o, d = self._chunkcache

1703

o, d = self._chunkcache

1670

# try to add to existing cache

1704

# try to add to existing cache

1671

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1705

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1672

self._chunkcache = o, d + data

1706

self._chunkcache = o, d + data

1673

else:

1707

else:

1674

self._chunkcache = offset, data

1708

self._chunkcache = offset, data

1675

1709

1676

def _readsegment(self, offset, length, df=None):

1710

def _readsegment(self, offset, length, df=None):

1677

"""Load a segment of raw data from the revlog.

1711

"""Load a segment of raw data from the revlog.

1678

1712

1679

Accepts an absolute offset, length to read, and an optional existing

1713

Accepts an absolute offset, length to read, and an optional existing

1680

file handle to read from.

1714

file handle to read from.

1681

1715

1682

If an existing file handle is passed, it will be seeked and the

1716

If an existing file handle is passed, it will be seeked and the

1683

original seek position will NOT be restored.

1717

original seek position will NOT be restored.

1684

1718

1685

Returns a str or buffer of raw byte data.

1719

Returns a str or buffer of raw byte data.

1686

"""

1720

"""

1687

# Cache data both forward and backward around the requested

1721

# Cache data both forward and backward around the requested

1688

# data, in a fixed size window. This helps speed up operations

1722

# data, in a fixed size window. This helps speed up operations

1689

# involving reading the revlog backwards.

1723

# involving reading the revlog backwards.

1690

cachesize = self._chunkcachesize

1724

cachesize = self._chunkcachesize

1691

realoffset = offset & ~(cachesize - 1)

1725

realoffset = offset & ~(cachesize - 1)

1692

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1726

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1693

- realoffset)

1727

- realoffset)

1694

with self._datareadfp(df) as df:

1728

with self._datareadfp(df) as df:

1695

df.seek(realoffset)

1729

df.seek(realoffset)

1696

d = df.read(reallength)

1730

d = df.read(reallength)

1697

self._cachesegment(realoffset, d)

1731

self._cachesegment(realoffset, d)

1698

if offset != realoffset or reallength != length:

1732

if offset != realoffset or reallength != length:

1699

return util.buffer(d, offset - realoffset, length)

1733

return util.buffer(d, offset - realoffset, length)

1700

return d

1734

return d

1701

1735

1702

def _getsegment(self, offset, length, df=None):

1736

def _getsegment(self, offset, length, df=None):

1703

"""Obtain a segment of raw data from the revlog.

1737

"""Obtain a segment of raw data from the revlog.

1704

1738

1705

Accepts an absolute offset, length of bytes to obtain, and an

1739

Accepts an absolute offset, length of bytes to obtain, and an

1706

optional file handle to the already-opened revlog. If the file

1740

optional file handle to the already-opened revlog. If the file

1707

handle is used, it's original seek position will not be preserved.

1741

handle is used, it's original seek position will not be preserved.

1708

1742

1709

Requests for data may be returned from a cache.

1743

Requests for data may be returned from a cache.

1710

1744

1711

Returns a str or a buffer instance of raw byte data.

1745

Returns a str or a buffer instance of raw byte data.

1712

"""

1746

"""

1713

o, d = self._chunkcache

1747

o, d = self._chunkcache

1714

l = len(d)

1748

l = len(d)

1715

1749

1716

# is it in the cache?

1750

# is it in the cache?

1717

cachestart = offset - o

1751

cachestart = offset - o

1718

cacheend = cachestart + length

1752

cacheend = cachestart + length

1719

if cachestart >= 0 and cacheend <= l:

1753

if cachestart >= 0 and cacheend <= l:

1720

if cachestart == 0 and cacheend == l:

1754

if cachestart == 0 and cacheend == l:

1721

return d # avoid a copy

1755

return d # avoid a copy

1722

return util.buffer(d, cachestart, cacheend - cachestart)

1756

return util.buffer(d, cachestart, cacheend - cachestart)

1723

1757

1724

return self._readsegment(offset, length, df=df)

1758

return self._readsegment(offset, length, df=df)

1725

1759

1726

def _getsegmentforrevs(self, startrev, endrev, df=None):

1760

def _getsegmentforrevs(self, startrev, endrev, df=None):

1727

"""Obtain a segment of raw data corresponding to a range of revisions.

1761

"""Obtain a segment of raw data corresponding to a range of revisions.

1728

1762

1729

Accepts the start and end revisions and an optional already-open

1763

Accepts the start and end revisions and an optional already-open

1730

file handle to be used for reading. If the file handle is read, its

1764

file handle to be used for reading. If the file handle is read, its

1731

seek position will not be preserved.

1765

seek position will not be preserved.

1732

1766

1733

Requests for data may be satisfied by a cache.

1767

Requests for data may be satisfied by a cache.

1734

1768

1735

Returns a 2-tuple of (offset, data) for the requested range of

1769

Returns a 2-tuple of (offset, data) for the requested range of

1736

revisions. Offset is the integer offset from the beginning of the

1770

revisions. Offset is the integer offset from the beginning of the

1737

revlog and data is a str or buffer of the raw byte data.

1771

revlog and data is a str or buffer of the raw byte data.

1738

1772

1739

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1773

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1740

to determine where each revision's data begins and ends.

1774

to determine where each revision's data begins and ends.

1741

"""

1775

"""

1742

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1776

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1743

# (functions are expensive).

1777

# (functions are expensive).

1744

index = self.index

1778

index = self.index

1745

istart = index[startrev]

1779

istart = index[startrev]

1746

start = int(istart[0] >> 16)

1780

start = int(istart[0] >> 16)

1747

if startrev == endrev:

1781

if startrev == endrev:

1748

end = start + istart[1]

1782

end = start + istart[1]

1749

else:

1783

else:

1750

iend = index[endrev]

1784

iend = index[endrev]

1751

end = int(iend[0] >> 16) + iend[1]

1785

end = int(iend[0] >> 16) + iend[1]

1752

1786

1753

if self._inline:

1787

if self._inline:

1754

start += (startrev + 1) * self._io.size

1788

start += (startrev + 1) * self._io.size

1755

end += (endrev + 1) * self._io.size

1789

end += (endrev + 1) * self._io.size

1756

length = end - start

1790

length = end - start

1757

1791

1758

return start, self._getsegment(start, length, df=df)

1792

return start, self._getsegment(start, length, df=df)

1759

1793

1760

def _chunk(self, rev, df=None):

1794

def _chunk(self, rev, df=None):

1761

"""Obtain a single decompressed chunk for a revision.

1795

"""Obtain a single decompressed chunk for a revision.

1762

1796

1763

Accepts an integer revision and an optional already-open file handle

1797

Accepts an integer revision and an optional already-open file handle

1764

to be used for reading. If used, the seek position of the file will not

1798

to be used for reading. If used, the seek position of the file will not

1765

be preserved.

1799

be preserved.

1766

1800

1767

Returns a str holding uncompressed data for the requested revision.

1801

Returns a str holding uncompressed data for the requested revision.

1768

"""

1802

"""

1769

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1803

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1770

1804

1771

def _chunks(self, revs, df=None):

1805

def _chunks(self, revs, df=None):

1772

"""Obtain decompressed chunks for the specified revisions.

1806

"""Obtain decompressed chunks for the specified revisions.

1773

1807

1774

Accepts an iterable of numeric revisions that are assumed to be in

1808

Accepts an iterable of numeric revisions that are assumed to be in

1775

ascending order. Also accepts an optional already-open file handle

1809

ascending order. Also accepts an optional already-open file handle

1776

to be used for reading. If used, the seek position of the file will

1810

to be used for reading. If used, the seek position of the file will

1777

not be preserved.

1811

not be preserved.

1778

1812

1779

This function is similar to calling ``self._chunk()`` multiple times,

1813

This function is similar to calling ``self._chunk()`` multiple times,

1780

but is faster.

1814

but is faster.

1781

1815

1782

Returns a list with decompressed data for each requested revision.

1816

Returns a list with decompressed data for each requested revision.

1783

"""

1817

"""

1784

if not revs:

1818

if not revs:

1785

return []

1819

return []

1786

start = self.start

1820

start = self.start

1787

length = self.length

1821

length = self.length

1788

inline = self._inline

1822

inline = self._inline

1789

iosize = self._io.size

1823

iosize = self._io.size

1790

buffer = util.buffer

1824

buffer = util.buffer

1791

1825

1792

l = []

1826

l = []

1793

ladd = l.append

1827

ladd = l.append

1794

1828

1795

if not self._withsparseread:

1829

if not self._withsparseread:

1796

slicedchunks = (revs,)

1830

slicedchunks = (revs,)

1797

else:

1831

else:

1798

slicedchunks = _slicechunk(self, revs)

1832

slicedchunks = _slicechunk(self, revs)

1799

1833

1800

for revschunk in slicedchunks:

1834

for revschunk in slicedchunks:

1801

firstrev = revschunk[0]

1835

firstrev = revschunk[0]

1802

# Skip trailing revisions with empty diff

1836

# Skip trailing revisions with empty diff

1803

for lastrev in revschunk[::-1]:

1837

for lastrev in revschunk[::-1]:

1804

if length(lastrev) != 0:

1838

if length(lastrev) != 0:

1805

break

1839

break

1806

1840

1807

try:

1841

try:

1808

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1842

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1809

except OverflowError:

1843

except OverflowError:

1810

# issue4215 - we can't cache a run of chunks greater than

1844

# issue4215 - we can't cache a run of chunks greater than

1811

# 2G on Windows

1845

# 2G on Windows

1812

return [self._chunk(rev, df=df) for rev in revschunk]

1846

return [self._chunk(rev, df=df) for rev in revschunk]

1813

1847

1814

decomp = self.decompress

1848

decomp = self.decompress

1815

for rev in revschunk:

1849

for rev in revschunk:

1816

chunkstart = start(rev)

1850

chunkstart = start(rev)

1817

if inline:

1851

if inline:

1818

chunkstart += (rev + 1) * iosize

1852

chunkstart += (rev + 1) * iosize

1819

chunklength = length(rev)

1853

chunklength = length(rev)

1820

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1854

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1821

1855

1822

return l

1856

return l

1823

1857

1824

def _chunkclear(self):

1858

def _chunkclear(self):

1825

"""Clear the raw chunk cache."""

1859

"""Clear the raw chunk cache."""

1826

self._chunkcache = (0, '')

1860

self._chunkcache = (0, '')

1827

1861

1828

def deltaparent(self, rev):

1862

def deltaparent(self, rev):

1829

"""return deltaparent of the given revision"""

1863

"""return deltaparent of the given revision"""

1830

base = self.index[rev][3]

1864

base = self.index[rev][3]

1831

if base == rev:

1865

if base == rev:

1832

return nullrev

1866

return nullrev

1833

elif self._generaldelta:

1867

elif self._generaldelta:

1834

return base

1868

return base

1835

else:

1869

else:

1836

return rev - 1

1870

return rev - 1

1837

1871

1838

def revdiff(self, rev1, rev2):

1872

def revdiff(self, rev1, rev2):

1839

"""return or calculate a delta between two revisions

1873

"""return or calculate a delta between two revisions

1840

1874

1841

The delta calculated is in binary form and is intended to be written to

1875

The delta calculated is in binary form and is intended to be written to

1842

revlog data directly. So this function needs raw revision data.

1876

revlog data directly. So this function needs raw revision data.

1843

"""

1877

"""

1844

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1878

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1845

return bytes(self._chunk(rev2))

1879

return bytes(self._chunk(rev2))

1846

1880

1847

return mdiff.textdiff(self.revision(rev1, raw=True),

1881

return mdiff.textdiff(self.revision(rev1, raw=True),

1848

self.revision(rev2, raw=True))

1882

self.revision(rev2, raw=True))

1849

1883

1850

def revision(self, nodeorrev, _df=None, raw=False):

1884

def revision(self, nodeorrev, _df=None, raw=False):

1851

"""return an uncompressed revision of a given node or revision

1885

"""return an uncompressed revision of a given node or revision

1852

number.

1886

number.

1853

1887

1854

_df - an existing file handle to read from. (internal-only)

1888

_df - an existing file handle to read from. (internal-only)

1855

raw - an optional argument specifying if the revision data is to be

1889

raw - an optional argument specifying if the revision data is to be

1856

treated as raw data when applying flag transforms. 'raw' should be set

1890

treated as raw data when applying flag transforms. 'raw' should be set

1857

to True when generating changegroups or in debug commands.

1891

to True when generating changegroups or in debug commands.

1858

"""

1892

"""

1859

if isinstance(nodeorrev, int):

1893

if isinstance(nodeorrev, int):

1860

rev = nodeorrev

1894

rev = nodeorrev

1861

node = self.node(rev)

1895

node = self.node(rev)

1862

else:

1896

else:

1863

node = nodeorrev

1897

node = nodeorrev

1864

rev = None

1898

rev = None

1865

1899

1866

cachedrev = None

1900

cachedrev = None

1867

flags = None

1901

flags = None

1868

rawtext = None

1902

rawtext = None

1869

if node == nullid:

1903

if node == nullid:

1870

return ""

1904

return ""

1871

if self._cache:

1905

if self._cache:

1872

if self._cache[0] == node:

1906

if self._cache[0] == node:

1873

# _cache only stores rawtext

1907

# _cache only stores rawtext

1874

if raw:

1908

if raw:

1875

return self._cache[2]

1909

return self._cache[2]

1876

# duplicated, but good for perf

1910

# duplicated, but good for perf

1877

if rev is None:

1911

if rev is None:

1878

rev = self.rev(node)

1912

rev = self.rev(node)

1879

if flags is None:

1913

if flags is None:

1880

flags = self.flags(rev)

1914

flags = self.flags(rev)

1881

# no extra flags set, no flag processor runs, text = rawtext

1915

# no extra flags set, no flag processor runs, text = rawtext

1882

if flags == REVIDX_DEFAULT_FLAGS:

1916

if flags == REVIDX_DEFAULT_FLAGS:

1883

return self._cache[2]

1917

return self._cache[2]

1884

# rawtext is reusable. need to run flag processor

1918

# rawtext is reusable. need to run flag processor

1885

rawtext = self._cache[2]

1919

rawtext = self._cache[2]

1886

1920

1887

cachedrev = self._cache[1]

1921

cachedrev = self._cache[1]

1888

1922

1889

# look up what we need to read

1923

# look up what we need to read

1890

if rawtext is None:

1924

if rawtext is None:

1891

if rev is None:

1925

if rev is None:

1892

rev = self.rev(node)

1926

rev = self.rev(node)

1893

1927

1894

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1928

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1895

if stopped:

1929

if stopped:

1896

rawtext = self._cache[2]

1930

rawtext = self._cache[2]

1897

1931

1898

# drop cache to save memory

1932

# drop cache to save memory

1899

self._cache = None

1933

self._cache = None

1900

1934

1901

bins = self._chunks(chain, df=_df)

1935

bins = self._chunks(chain, df=_df)

1902

if rawtext is None:

1936

if rawtext is None:

1903

rawtext = bytes(bins[0])

1937

rawtext = bytes(bins[0])

1904

bins = bins[1:]

1938

bins = bins[1:]

1905

1939

1906

rawtext = mdiff.patches(rawtext, bins)

1940

rawtext = mdiff.patches(rawtext, bins)

1907

self._cache = (node, rev, rawtext)

1941

self._cache = (node, rev, rawtext)

1908

1942

1909

if flags is None:

1943

if flags is None:

1910

if rev is None:

1944

if rev is None:

1911

rev = self.rev(node)

1945

rev = self.rev(node)

1912

flags = self.flags(rev)

1946

flags = self.flags(rev)

1913

1947

1914

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1948

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1915

if validatehash:

1949

if validatehash:

1916

self.checkhash(text, node, rev=rev)

1950

self.checkhash(text, node, rev=rev)

1917

1951

1918

return text

1952

return text

1919

1953

1920

def hash(self, text, p1, p2):

1954

def hash(self, text, p1, p2):

1921

"""Compute a node hash.

1955

"""Compute a node hash.

1922

1956

1923

Available as a function so that subclasses can replace the hash

1957

Available as a function so that subclasses can replace the hash

1924

as needed.

1958

as needed.

1925

"""

1959

"""

1926

return hash(text, p1, p2)

1960

return hash(text, p1, p2)

1927

1961

1928

def _processflags(self, text, flags, operation, raw=False):

1962

def _processflags(self, text, flags, operation, raw=False):

1929

"""Inspect revision data flags and applies transforms defined by

1963

"""Inspect revision data flags and applies transforms defined by

1930

registered flag processors.

1964

registered flag processors.

1931

1965

1932

``text`` - the revision data to process

1966

``text`` - the revision data to process

1933

``flags`` - the revision flags

1967

``flags`` - the revision flags

1934

``operation`` - the operation being performed (read or write)

1968

``operation`` - the operation being performed (read or write)

1935

``raw`` - an optional argument describing if the raw transform should be

1969

``raw`` - an optional argument describing if the raw transform should be

1936

applied.

1970

applied.

1937

1971

1938

This method processes the flags in the order (or reverse order if

1972

This method processes the flags in the order (or reverse order if

1939

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1973

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1940

flag processors registered for present flags. The order of flags defined

1974

flag processors registered for present flags. The order of flags defined

1941

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1975

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1942

1976

1943

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1977

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1944

processed text and ``validatehash`` is a bool indicating whether the

1978

processed text and ``validatehash`` is a bool indicating whether the

1945

returned text should be checked for hash integrity.

1979

returned text should be checked for hash integrity.

1946

1980

1947

Note: If the ``raw`` argument is set, it has precedence over the

1981

Note: If the ``raw`` argument is set, it has precedence over the

1948

operation and will only update the value of ``validatehash``.

1982

operation and will only update the value of ``validatehash``.

1949

"""

1983

"""

1950

# fast path: no flag processors will run

1984

# fast path: no flag processors will run

1951

if flags == 0:

1985

if flags == 0:

1952

return text, True

1986

return text, True

1953

if not operation in ('read', 'write'):

1987

if not operation in ('read', 'write'):

1954

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1988

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1955

# Check all flags are known.

1989

# Check all flags are known.

1956

if flags & ~REVIDX_KNOWN_FLAGS:

1990

if flags & ~REVIDX_KNOWN_FLAGS:

1957

raise RevlogError(_("incompatible revision flag '%#x'") %

1991

raise RevlogError(_("incompatible revision flag '%#x'") %

1958

(flags & ~REVIDX_KNOWN_FLAGS))

1992

(flags & ~REVIDX_KNOWN_FLAGS))

1959

validatehash = True

1993

validatehash = True

1960

# Depending on the operation (read or write), the order might be

1994

# Depending on the operation (read or write), the order might be

1961

# reversed due to non-commutative transforms.

1995

# reversed due to non-commutative transforms.

1962

orderedflags = REVIDX_FLAGS_ORDER

1996

orderedflags = REVIDX_FLAGS_ORDER

1963

if operation == 'write':

1997

if operation == 'write':

1964

orderedflags = reversed(orderedflags)

1998

orderedflags = reversed(orderedflags)

1965

1999

1966

for flag in orderedflags:

2000

for flag in orderedflags:

1967

# If a flagprocessor has been registered for a known flag, apply the

2001

# If a flagprocessor has been registered for a known flag, apply the

1968

# related operation transform and update result tuple.

2002

# related operation transform and update result tuple.

1969

if flag & flags:

2003

if flag & flags:

1970

vhash = True

2004

vhash = True

1971

2005

1972

if flag not in _flagprocessors:

2006

if flag not in _flagprocessors:

1973

message = _("missing processor for flag '%#x'") % (flag)

2007

message = _("missing processor for flag '%#x'") % (flag)

1974

raise RevlogError(message)

2008

raise RevlogError(message)

1975

2009

1976

processor = _flagprocessors[flag]

2010

processor = _flagprocessors[flag]

1977

if processor is not None:

2011

if processor is not None:

1978

readtransform, writetransform, rawtransform = processor

2012

readtransform, writetransform, rawtransform = processor

1979

2013

1980

if raw:

2014

if raw:

1981

vhash = rawtransform(self, text)

2015

vhash = rawtransform(self, text)

1982

elif operation == 'read':

2016

elif operation == 'read':

1983

text, vhash = readtransform(self, text)

2017

text, vhash = readtransform(self, text)

1984

else: # write operation

2018

else: # write operation

1985

text, vhash = writetransform(self, text)

2019

text, vhash = writetransform(self, text)

1986

validatehash = validatehash and vhash

2020

validatehash = validatehash and vhash

1987

2021

1988

return text, validatehash

2022

return text, validatehash

1989

2023

1990

def checkhash(self, text, node, p1=None, p2=None, rev=None):

2024

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1991

"""Check node hash integrity.

2025

"""Check node hash integrity.

1992

2026

1993

Available as a function so that subclasses can extend hash mismatch

2027

Available as a function so that subclasses can extend hash mismatch

1994

behaviors as needed.

2028

behaviors as needed.

1995

"""

2029

"""

1996

try:

2030

try:

1997

if p1 is None and p2 is None:

2031

if p1 is None and p2 is None:

1998

p1, p2 = self.parents(node)

2032

p1, p2 = self.parents(node)

1999

if node != self.hash(text, p1, p2):

2033

if node != self.hash(text, p1, p2):

2000

revornode = rev

2034

revornode = rev

2001

if revornode is None:

2035

if revornode is None:

2002

revornode = templatefilters.short(hex(node))

2036

revornode = templatefilters.short(hex(node))

2003

raise RevlogError(_("integrity check failed on %s:%s")

2037

raise RevlogError(_("integrity check failed on %s:%s")

2004

% (self.indexfile, pycompat.bytestr(revornode)))

2038

% (self.indexfile, pycompat.bytestr(revornode)))

2005

except RevlogError:

2039

except RevlogError:

2006

if self._censorable and _censoredtext(text):

2040

if self._censorable and _censoredtext(text):

2007

raise error.CensoredNodeError(self.indexfile, node, text)

2041

raise error.CensoredNodeError(self.indexfile, node, text)

2008

raise

2042

raise

2009

2043

2010

def _enforceinlinesize(self, tr, fp=None):

2044

def _enforceinlinesize(self, tr, fp=None):

2011

"""Check if the revlog is too big for inline and convert if so.

2045

"""Check if the revlog is too big for inline and convert if so.

2012

2046

2013

This should be called after revisions are added to the revlog. If the

2047

This should be called after revisions are added to the revlog. If the

2014

revlog has grown too large to be an inline revlog, it will convert it

2048

revlog has grown too large to be an inline revlog, it will convert it

2015

to use multiple index and data files.

2049

to use multiple index and data files.

2016

"""

2050

"""

2017

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

2051

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

2018

return

2052

return

2019

2053

2020

trinfo = tr.find(self.indexfile)

2054

trinfo = tr.find(self.indexfile)

2021

if trinfo is None:

2055

if trinfo is None:

2022

raise RevlogError(_("%s not found in the transaction")

2056

raise RevlogError(_("%s not found in the transaction")

2023

% self.indexfile)

2057

% self.indexfile)

2024

2058

2025

trindex = trinfo[2]

2059

trindex = trinfo[2]

2026

if trindex is not None:

2060

if trindex is not None:

2027

dataoff = self.start(trindex)

2061

dataoff = self.start(trindex)

2028

else:

2062

else:

2029

# revlog was stripped at start of transaction, use all leftover data

2063

# revlog was stripped at start of transaction, use all leftover data

2030

trindex = len(self) - 1

2064

trindex = len(self) - 1

2031

dataoff = self.end(-2)

2065

dataoff = self.end(-2)

2032

2066

2033

tr.add(self.datafile, dataoff)

2067

tr.add(self.datafile, dataoff)

2034

2068

2035

if fp:

2069

if fp:

2036

fp.flush()

2070

fp.flush()

2037

fp.close()

2071

fp.close()

2038

2072

2039

with self._datafp('w') as df:

2073

with self._datafp('w') as df:

2040

for r in self:

2074

for r in self:

2041

df.write(self._getsegmentforrevs(r, r)[1])

2075

df.write(self._getsegmentforrevs(r, r)[1])

2042

2076

2043

with self._indexfp('w') as fp:

2077

with self._indexfp('w') as fp:

2044

self.version &= ~FLAG_INLINE_DATA

2078

self.version &= ~FLAG_INLINE_DATA

2045

self._inline = False

2079

self._inline = False

2046

io = self._io

2080

io = self._io

2047

for i in self:

2081

for i in self:

2048

e = io.packentry(self.index[i], self.node, self.version, i)

2082

e = io.packentry(self.index[i], self.node, self.version, i)

2049

fp.write(e)

2083

fp.write(e)

2050

2084

2051

# the temp file replace the real index when we exit the context

2085

# the temp file replace the real index when we exit the context

2052

# manager

2086

# manager

2053

2087

2054

tr.replace(self.indexfile, trindex * self._io.size)

2088

tr.replace(self.indexfile, trindex * self._io.size)

2055

self._chunkclear()

2089

self._chunkclear()

2056

2090

2057

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2091

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

2058

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2092

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

2059

"""add a revision to the log

2093

"""add a revision to the log

2060

2094

2061

text - the revision data to add

2095

text - the revision data to add

2062

transaction - the transaction object used for rollback

2096

transaction - the transaction object used for rollback

2063

link - the linkrev data to add

2097

link - the linkrev data to add

2064

p1, p2 - the parent nodeids of the revision

2098

p1, p2 - the parent nodeids of the revision

2065

cachedelta - an optional precomputed delta

2099

cachedelta - an optional precomputed delta

2066

node - nodeid of revision; typically node is not specified, and it is

2100

node - nodeid of revision; typically node is not specified, and it is

2067

computed by default as hash(text, p1, p2), however subclasses might

2101

computed by default as hash(text, p1, p2), however subclasses might

2068

use different hashing method (and override checkhash() in such case)

2102

use different hashing method (and override checkhash() in such case)

2069

flags - the known flags to set on the revision

2103

flags - the known flags to set on the revision

2070

deltacomputer - an optional _deltacomputer instance shared between

2104

deltacomputer - an optional _deltacomputer instance shared between

2071

multiple calls

2105

multiple calls

2072

"""

2106

"""

2073

if link == nullrev:

2107

if link == nullrev:

2074

raise RevlogError(_("attempted to add linkrev -1 to %s")

2108

raise RevlogError(_("attempted to add linkrev -1 to %s")

2075

% self.indexfile)

2109

% self.indexfile)

2076

2110

2077

if flags:

2111

if flags:

2078

node = node or self.hash(text, p1, p2)

2112

node = node or self.hash(text, p1, p2)

2079

2113

2080

rawtext, validatehash = self._processflags(text, flags, 'write')

2114

rawtext, validatehash = self._processflags(text, flags, 'write')

2081

2115

2082

# If the flag processor modifies the revision data, ignore any provided

2116

# If the flag processor modifies the revision data, ignore any provided

2083

# cachedelta.

2117

# cachedelta.

2084

if rawtext != text:

2118

if rawtext != text:

2085

cachedelta = None

2119

cachedelta = None

2086

2120

2087

if len(rawtext) > _maxentrysize:

2121

if len(rawtext) > _maxentrysize:

2088

raise RevlogError(

2122

raise RevlogError(

2089

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2123

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

2090

% (self.indexfile, len(rawtext)))

2124

% (self.indexfile, len(rawtext)))

2091

2125

2092

node = node or self.hash(rawtext, p1, p2)

2126

node = node or self.hash(rawtext, p1, p2)

2093

if node in self.nodemap:

2127

if node in self.nodemap:

2094

return node

2128

return node

2095

2129

2096

if validatehash:

2130

if validatehash:

2097

self.checkhash(rawtext, node, p1=p1, p2=p2)

2131

self.checkhash(rawtext, node, p1=p1, p2=p2)

2098

2132

2099

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2133

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

2100

flags, cachedelta=cachedelta,

2134

flags, cachedelta=cachedelta,

2101

deltacomputer=deltacomputer)

2135

deltacomputer=deltacomputer)

2102

2136

2103

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2137

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

2104

cachedelta=None, deltacomputer=None):

2138

cachedelta=None, deltacomputer=None):

2105

"""add a raw revision with known flags, node and parents

2139

"""add a raw revision with known flags, node and parents

2106

useful when reusing a revision not stored in this revlog (ex: received

2140

useful when reusing a revision not stored in this revlog (ex: received

2107

over wire, or read from an external bundle).

2141

over wire, or read from an external bundle).

2108

"""

2142

"""

2109

dfh = None

2143

dfh = None

2110

if not self._inline:

2144

if not self._inline:

2111

dfh = self._datafp("a+")

2145

dfh = self._datafp("a+")

2112

ifh = self._indexfp("a+")

2146

ifh = self._indexfp("a+")

2113

try:

2147

try:

2114

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2148

return self._addrevision(node, rawtext, transaction, link, p1, p2,

2115

flags, cachedelta, ifh, dfh,

2149

flags, cachedelta, ifh, dfh,

2116

deltacomputer=deltacomputer)

2150

deltacomputer=deltacomputer)

2117

finally:

2151

finally:

2118

if dfh:

2152

if dfh:

2119

dfh.close()

2153

dfh.close()

2120

ifh.close()

2154

ifh.close()

2121

2155

2122

def compress(self, data):

2156

def compress(self, data):

2123

"""Generate a possibly-compressed representation of data."""

2157

"""Generate a possibly-compressed representation of data."""

2124

if not data:

2158

if not data:

2125

return '', data

2159

return '', data

2126

2160

2127

compressed = self._compressor.compress(data)

2161

compressed = self._compressor.compress(data)

2128

2162

2129

if compressed:

2163

if compressed:

2130

# The revlog compressor added the header in the returned data.

2164

# The revlog compressor added the header in the returned data.

2131

return '', compressed

2165

return '', compressed

2132

2166

2133

if data[0:1] == '\0':

2167

if data[0:1] == '\0':

2134

return '', data

2168

return '', data

2135

return 'u', data

2169

return 'u', data

2136

2170

2137

def decompress(self, data):

2171

def decompress(self, data):

2138

"""Decompress a revlog chunk.

2172

"""Decompress a revlog chunk.

2139

2173

2140

The chunk is expected to begin with a header identifying the

2174

The chunk is expected to begin with a header identifying the

2141

format type so it can be routed to an appropriate decompressor.

2175

format type so it can be routed to an appropriate decompressor.

2142

"""

2176

"""

2143

if not data:

2177

if not data:

2144

return data

2178

return data

2145

2179

2146

# Revlogs are read much more frequently than they are written and many

2180

# Revlogs are read much more frequently than they are written and many

2147

# chunks only take microseconds to decompress, so performance is

2181

# chunks only take microseconds to decompress, so performance is

2148

# important here.

2182

# important here.

2149

#

2183

#

2150

# We can make a few assumptions about revlogs:

2184

# We can make a few assumptions about revlogs:

2151

#

2185

#

2152

# 1) the majority of chunks will be compressed (as opposed to inline

2186

# 1) the majority of chunks will be compressed (as opposed to inline

2153

# raw data).

2187

# raw data).

2154

# 2) decompressing *any* data will likely by at least 10x slower than

2188

# 2) decompressing *any* data will likely by at least 10x slower than

2155

# returning raw inline data.

2189

# returning raw inline data.

2156

# 3) we want to prioritize common and officially supported compression

2190

# 3) we want to prioritize common and officially supported compression

2157

# engines

2191

# engines

2158

#

2192

#

2159

# It follows that we want to optimize for "decompress compressed data

2193

# It follows that we want to optimize for "decompress compressed data

2160

# when encoded with common and officially supported compression engines"

2194

# when encoded with common and officially supported compression engines"

2161

# case over "raw data" and "data encoded by less common or non-official

2195

# case over "raw data" and "data encoded by less common or non-official

2162

# compression engines." That is why we have the inline lookup first

2196

# compression engines." That is why we have the inline lookup first

2163

# followed by the compengines lookup.

2197

# followed by the compengines lookup.

2164

#

2198

#

2165

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2199

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

2166

# compressed chunks. And this matters for changelog and manifest reads.

2200

# compressed chunks. And this matters for changelog and manifest reads.

2167

t = data[0:1]

2201

t = data[0:1]

2168

2202

2169

if t == 'x':

2203

if t == 'x':

2170

try:

2204

try:

2171

return _zlibdecompress(data)

2205

return _zlibdecompress(data)

2172

except zlib.error as e:

2206

except zlib.error as e:

2173

raise RevlogError(_('revlog decompress error: %s') %

2207

raise RevlogError(_('revlog decompress error: %s') %

2174

stringutil.forcebytestr(e))

2208

stringutil.forcebytestr(e))

2175

# '\0' is more common than 'u' so it goes first.

2209

# '\0' is more common than 'u' so it goes first.

2176

elif t == '\0':

2210

elif t == '\0':

2177

return data

2211

return data

2178

elif t == 'u':

2212

elif t == 'u':

2179

return util.buffer(data, 1)

2213

return util.buffer(data, 1)

2180

2214

2181

try:

2215

try:

2182

compressor = self._decompressors[t]

2216

compressor = self._decompressors[t]

2183

except KeyError:

2217

except KeyError:

2184

try:

2218

try:

2185

engine = util.compengines.forrevlogheader(t)

2219

engine = util.compengines.forrevlogheader(t)

2186

compressor = engine.revlogcompressor()

2220

compressor = engine.revlogcompressor()

2187

self._decompressors[t] = compressor

2221

self._decompressors[t] = compressor

2188

except KeyError:

2222

except KeyError:

2189

raise RevlogError(_('unknown compression type %r') % t)

2223

raise RevlogError(_('unknown compression type %r') % t)

2190

2224

2191

return compressor.decompress(data)

2225

return compressor.decompress(data)

2192

2226

2193

def _isgooddeltainfo(self, deltainfo, revinfo):

2227

def _isgooddeltainfo(self, deltainfo, revinfo):

2194

"""Returns True if the given delta is good. Good means that it is within

2228

"""Returns True if the given delta is good. Good means that it is within

2195

the disk span, disk size, and chain length bounds that we know to be

2229

the disk span, disk size, and chain length bounds that we know to be

2196

performant."""

2230

performant."""

2197

if deltainfo is None:

2231

if deltainfo is None:

2198

return False

2232

return False

2199

2233

2200

# - 'deltainfo.distance' is the distance from the base revision --

2234

# - 'deltainfo.distance' is the distance from the base revision --

2201

# bounding it limits the amount of I/O we need to do.

2235

# bounding it limits the amount of I/O we need to do.

2202

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2236

# - 'deltainfo.compresseddeltalen' is the sum of the total size of

2203

# deltas we need to apply -- bounding it limits the amount of CPU

2237

# deltas we need to apply -- bounding it limits the amount of CPU

2204

# we consume.

2238

# we consume.

2205

2239

2206

textlen = revinfo.textlen

2240

textlen = revinfo.textlen

2207

defaultmax = textlen * 4

2241

defaultmax = textlen * 4

2208

maxdist = self._maxdeltachainspan

2242

maxdist = self._maxdeltachainspan

2209

if not maxdist:

2243

if not maxdist:

2210

maxdist = deltainfo.distance # ensure the conditional pass

2244

maxdist = deltainfo.distance # ensure the conditional pass

2211

maxdist = max(maxdist, defaultmax)

2245

maxdist = max(maxdist, defaultmax)

2212

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2246

if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or

2213

deltainfo.compresseddeltalen > textlen * 2 or

2247

deltainfo.compresseddeltalen > textlen * 2 or

2214

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2248

(self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):

2215

return False

2249

return False

2216

2250

2217

return True

2251

return True

2218

2252

2219

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2253

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

2220

cachedelta, ifh, dfh, alwayscache=False,

2254

cachedelta, ifh, dfh, alwayscache=False,

2221

deltacomputer=None):

2255

deltacomputer=None):

2222

"""internal function to add revisions to the log

2256

"""internal function to add revisions to the log

2223

2257

2224

see addrevision for argument descriptions.

2258

see addrevision for argument descriptions.

2225

2259

2226

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2260

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

2227

2261

2228

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2262

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

2229

be used.

2263

be used.

2230

2264

2231

invariants:

2265

invariants:

2232

- rawtext is optional (can be None); if not set, cachedelta must be set.

2266

- rawtext is optional (can be None); if not set, cachedelta must be set.

2233

if both are set, they must correspond to each other.

2267

if both are set, they must correspond to each other.

2234

"""

2268

"""

2235

if node == nullid:

2269

if node == nullid:

2236

raise RevlogError(_("%s: attempt to add null revision") %

2270

raise RevlogError(_("%s: attempt to add null revision") %

2237

(self.indexfile))

2271

(self.indexfile))

2238

if node == wdirid or node in wdirfilenodeids:

2272

if node == wdirid or node in wdirfilenodeids:

2239

raise RevlogError(_("%s: attempt to add wdir revision") %

2273

raise RevlogError(_("%s: attempt to add wdir revision") %

2240

(self.indexfile))

2274

(self.indexfile))

2241

2275

2242

if self._inline:

2276

if self._inline:

2243

fh = ifh

2277

fh = ifh

2244

else:

2278

else:

2245

fh = dfh

2279

fh = dfh

2246

2280

2247

btext = [rawtext]

2281

btext = [rawtext]

2248

2282

2249

curr = len(self)

2283

curr = len(self)

2250

prev = curr - 1

2284

prev = curr - 1

2251

offset = self.end(prev)

2285

offset = self.end(prev)

2252

p1r, p2r = self.rev(p1), self.rev(p2)

2286

p1r, p2r = self.rev(p1), self.rev(p2)

2253

2287

2254

# full versions are inserted when the needed deltas

2288

# full versions are inserted when the needed deltas

2255

# become comparable to the uncompressed text

2289

# become comparable to the uncompressed text

2256

if rawtext is None:

2290

if rawtext is None:

2257

# need rawtext size, before changed by flag processors, which is

2291

# need rawtext size, before changed by flag processors, which is

2258

# the non-raw size. use revlog explicitly to avoid filelog's extra

2292

# the non-raw size. use revlog explicitly to avoid filelog's extra

2259

# logic that might remove metadata size.

2293

# logic that might remove metadata size.

2260

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2294

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

2261

cachedelta[1])

2295

cachedelta[1])

2262

else:

2296

else:

2263

textlen = len(rawtext)

2297

textlen = len(rawtext)

2264

2298

2265

if deltacomputer is None:

2299

if deltacomputer is None:

2266

deltacomputer = _deltacomputer(self)

2300

deltacomputer = _deltacomputer(self)

2267

2301

2268

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2302

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

2269

2303

2270

# no delta for flag processor revision (see "candelta" for why)

2304

# no delta for flag processor revision (see "candelta" for why)

2271

# not calling candelta since only one revision needs test, also to

2305

# not calling candelta since only one revision needs test, also to

2272

# avoid overhead fetching flags again.

2306

# avoid overhead fetching flags again.

2273

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2307

if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:

2274

deltainfo = None

2308

deltainfo = None

2275

else:

2309

else:

2276

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2310

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

2277

2311

2278

if deltainfo is not None:

2312

if deltainfo is not None:

2279

base = deltainfo.base

2313

base = deltainfo.base

2280

chainbase = deltainfo.chainbase

2314

chainbase = deltainfo.chainbase

2281

data = deltainfo.data

2315

data = deltainfo.data

2282

l = deltainfo.deltalen

2316

l = deltainfo.deltalen

2283

else:

2317

else:

2284

rawtext = deltacomputer.buildtext(revinfo, fh)

2318

rawtext = deltacomputer.buildtext(revinfo, fh)

2285

data = self.compress(rawtext)

2319

data = self.compress(rawtext)

2286

l = len(data[1]) + len(data[0])

2320

l = len(data[1]) + len(data[0])

2287

base = chainbase = curr

2321

base = chainbase = curr

2288

2322

2289

e = (offset_type(offset, flags), l, textlen,

2323

e = (offset_type(offset, flags), l, textlen,

2290

base, link, p1r, p2r, node)

2324

base, link, p1r, p2r, node)

2291

self.index.insert(-1, e)

2325

self.index.insert(-1, e)

2292

self.nodemap[node] = curr

2326

self.nodemap[node] = curr

2293

2327

2294

entry = self._io.packentry(e, self.node, self.version, curr)

2328

entry = self._io.packentry(e, self.node, self.version, curr)

2295

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2329

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

2296

2330

2297

if alwayscache and rawtext is None:

2331

if alwayscache and rawtext is None:

2298

rawtext = deltacomputer._buildtext(revinfo, fh)

2332

rawtext = deltacomputer._buildtext(revinfo, fh)

2299

2333

2300

if type(rawtext) == bytes: # only accept immutable objects

2334

if type(rawtext) == bytes: # only accept immutable objects

2301

self._cache = (node, curr, rawtext)

2335

self._cache = (node, curr, rawtext)

2302

self._chainbasecache[curr] = chainbase

2336

self._chainbasecache[curr] = chainbase

2303

return node

2337

return node

2304

2338

2305

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2339

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2306

# Files opened in a+ mode have inconsistent behavior on various

2340

# Files opened in a+ mode have inconsistent behavior on various

2307

# platforms. Windows requires that a file positioning call be made

2341

# platforms. Windows requires that a file positioning call be made

2308

# when the file handle transitions between reads and writes. See

2342

# when the file handle transitions between reads and writes. See

2309

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2343

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2310

# platforms, Python or the platform itself can be buggy. Some versions

2344

# platforms, Python or the platform itself can be buggy. Some versions

2311

# of Solaris have been observed to not append at the end of the file

2345

# of Solaris have been observed to not append at the end of the file

2312

# if the file was seeked to before the end. See issue4943 for more.

2346

# if the file was seeked to before the end. See issue4943 for more.

2313

#

2347

#

2314

# We work around this issue by inserting a seek() before writing.

2348

# We work around this issue by inserting a seek() before writing.

2315

# Note: This is likely not necessary on Python 3.

2349

# Note: This is likely not necessary on Python 3.

2316

ifh.seek(0, os.SEEK_END)

2350

ifh.seek(0, os.SEEK_END)

2317

if dfh:

2351

if dfh:

2318

dfh.seek(0, os.SEEK_END)

2352

dfh.seek(0, os.SEEK_END)

2319

2353

2320

curr = len(self) - 1

2354

curr = len(self) - 1

2321

if not self._inline:

2355

if not self._inline:

2322

transaction.add(self.datafile, offset)

2356

transaction.add(self.datafile, offset)

2323

transaction.add(self.indexfile, curr * len(entry))

2357

transaction.add(self.indexfile, curr * len(entry))

2324

if data[0]:

2358

if data[0]:

2325

dfh.write(data[0])

2359

dfh.write(data[0])

2326

dfh.write(data[1])

2360

dfh.write(data[1])

2327

ifh.write(entry)

2361

ifh.write(entry)

2328

else:

2362

else:

2329

offset += curr * self._io.size

2363

offset += curr * self._io.size

2330

transaction.add(self.indexfile, offset, curr)

2364

transaction.add(self.indexfile, offset, curr)

2331

ifh.write(entry)

2365

ifh.write(entry)

2332

ifh.write(data[0])

2366

ifh.write(data[0])

2333

ifh.write(data[1])

2367

ifh.write(data[1])

2334

self._enforceinlinesize(transaction, ifh)

2368

self._enforceinlinesize(transaction, ifh)

2335

2369

2336

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2370

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2337

"""

2371

"""

2338

add a delta group

2372

add a delta group

2339

2373

2340

given a set of deltas, add them to the revision log. the

2374

given a set of deltas, add them to the revision log. the

2341

first delta is against its parent, which should be in our

2375

first delta is against its parent, which should be in our

2342

log, the rest are against the previous delta.

2376

log, the rest are against the previous delta.

2343

2377

2344

If ``addrevisioncb`` is defined, it will be called with arguments of

2378

If ``addrevisioncb`` is defined, it will be called with arguments of

2345

this revlog and the node that was added.

2379

this revlog and the node that was added.

2346

"""

2380

"""

2347

2381

2348

nodes = []

2382

nodes = []

2349

2383

2350

r = len(self)

2384

r = len(self)

2351

end = 0

2385

end = 0

2352

if r:

2386

if r:

2353

end = self.end(r - 1)

2387

end = self.end(r - 1)

2354

ifh = self._indexfp("a+")

2388

ifh = self._indexfp("a+")

2355

isize = r * self._io.size

2389

isize = r * self._io.size

2356

if self._inline:

2390

if self._inline:

2357

transaction.add(self.indexfile, end + isize, r)

2391

transaction.add(self.indexfile, end + isize, r)

2358

dfh = None

2392

dfh = None

2359

else:

2393

else:

2360

transaction.add(self.indexfile, isize, r)

2394

transaction.add(self.indexfile, isize, r)

2361

transaction.add(self.datafile, end)

2395

transaction.add(self.datafile, end)

2362

dfh = self._datafp("a+")

2396

dfh = self._datafp("a+")

2363

def flush():

2397

def flush():

2364

if dfh:

2398

if dfh:

2365

dfh.flush()

2399

dfh.flush()

2366

ifh.flush()

2400

ifh.flush()

2367

try:

2401

try:

2368

deltacomputer = _deltacomputer(self)

2402

deltacomputer = _deltacomputer(self)

2369

# loop through our set of deltas

2403

# loop through our set of deltas

2370

for data in deltas:

2404

for data in deltas:

2371

node, p1, p2, linknode, deltabase, delta, flags = data

2405

node, p1, p2, linknode, deltabase, delta, flags = data

2372

link = linkmapper(linknode)

2406

link = linkmapper(linknode)

2373

flags = flags or REVIDX_DEFAULT_FLAGS

2407

flags = flags or REVIDX_DEFAULT_FLAGS

2374

2408

2375

nodes.append(node)

2409

nodes.append(node)

2376

2410

2377

if node in self.nodemap:

2411

if node in self.nodemap:

2378

# this can happen if two branches make the same change

2412

# this can happen if two branches make the same change

2379

continue

2413

continue

2380

2414

2381

for p in (p1, p2):

2415

for p in (p1, p2):

2382

if p not in self.nodemap:

2416

if p not in self.nodemap:

2383

raise LookupError(p, self.indexfile,

2417

raise LookupError(p, self.indexfile,

2384

_('unknown parent'))

2418

_('unknown parent'))

2385

2419

2386

if deltabase not in self.nodemap:

2420

if deltabase not in self.nodemap:

2387

raise LookupError(deltabase, self.indexfile,

2421

raise LookupError(deltabase, self.indexfile,

2388

_('unknown delta base'))

2422

_('unknown delta base'))

2389

2423

2390

baserev = self.rev(deltabase)

2424

baserev = self.rev(deltabase)

2391

2425

2392

if baserev != nullrev and self.iscensored(baserev):

2426

if baserev != nullrev and self.iscensored(baserev):

2393

# if base is censored, delta must be full replacement in a

2427

# if base is censored, delta must be full replacement in a

2394

# single patch operation

2428

# single patch operation

2395

hlen = struct.calcsize(">lll")

2429

hlen = struct.calcsize(">lll")

2396

oldlen = self.rawsize(baserev)

2430

oldlen = self.rawsize(baserev)

2397

newlen = len(delta) - hlen

2431

newlen = len(delta) - hlen

2398

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2432

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2399

raise error.CensoredBaseError(self.indexfile,

2433

raise error.CensoredBaseError(self.indexfile,

2400

self.node(baserev))

2434

self.node(baserev))

2401

2435

2402

if not flags and self._peek_iscensored(baserev, delta, flush):

2436

if not flags and self._peek_iscensored(baserev, delta, flush):

2403

flags |= REVIDX_ISCENSORED

2437

flags |= REVIDX_ISCENSORED

2404

2438

2405

# We assume consumers of addrevisioncb will want to retrieve

2439

# We assume consumers of addrevisioncb will want to retrieve

2406

# the added revision, which will require a call to

2440

# the added revision, which will require a call to

2407

# revision(). revision() will fast path if there is a cache

2441

# revision(). revision() will fast path if there is a cache

2408

# hit. So, we tell _addrevision() to always cache in this case.

2442

# hit. So, we tell _addrevision() to always cache in this case.

2409

# We're only using addgroup() in the context of changegroup

2443

# We're only using addgroup() in the context of changegroup

2410

# generation so the revision data can always be handled as raw

2444

# generation so the revision data can always be handled as raw

2411

# by the flagprocessor.

2445

# by the flagprocessor.

2412

self._addrevision(node, None, transaction, link,

2446

self._addrevision(node, None, transaction, link,

2413

p1, p2, flags, (baserev, delta),

2447

p1, p2, flags, (baserev, delta),

2414

ifh, dfh,

2448

ifh, dfh,

2415

alwayscache=bool(addrevisioncb),

2449

alwayscache=bool(addrevisioncb),

2416

deltacomputer=deltacomputer)

2450

deltacomputer=deltacomputer)

2417

2451

2418

if addrevisioncb:

2452

if addrevisioncb:

2419

addrevisioncb(self, node)

2453

addrevisioncb(self, node)

2420

2454

2421

if not dfh and not self._inline:

2455

if not dfh and not self._inline:

2422

# addrevision switched from inline to conventional

2456

# addrevision switched from inline to conventional

2423

# reopen the index

2457

# reopen the index

2424

ifh.close()

2458

ifh.close()

2425

dfh = self._datafp("a+")

2459

dfh = self._datafp("a+")

2426

ifh = self._indexfp("a+")

2460

ifh = self._indexfp("a+")

2427

finally:

2461

finally:

2428

if dfh:

2462

if dfh:

2429

dfh.close()

2463

dfh.close()

2430

ifh.close()

2464

ifh.close()

2431

2465

2432

return nodes

2466

return nodes

2433

2467

2434

def iscensored(self, rev):

2468

def iscensored(self, rev):

2435

"""Check if a file revision is censored."""

2469

"""Check if a file revision is censored."""

2436

if not self._censorable:

2470

if not self._censorable:

2437

return False

2471

return False

2438

2472

2439

return self.flags(rev) & REVIDX_ISCENSORED

2473

return self.flags(rev) & REVIDX_ISCENSORED

2440

2474

2441

def _peek_iscensored(self, baserev, delta, flush):

2475

def _peek_iscensored(self, baserev, delta, flush):

2442

"""Quickly check if a delta produces a censored revision."""

2476

"""Quickly check if a delta produces a censored revision."""

2443

if not self._censorable:

2477

if not self._censorable:

2444

return False

2478

return False

2445

2479

2446

# Fragile heuristic: unless new file meta keys are added alphabetically

2480

# Fragile heuristic: unless new file meta keys are added alphabetically

2447

# preceding "censored", all censored revisions are prefixed by

2481

# preceding "censored", all censored revisions are prefixed by

2448

# "\1\ncensored:". A delta producing such a censored revision must be a

2482

# "\1\ncensored:". A delta producing such a censored revision must be a

2449

# full-replacement delta, so we inspect the first and only patch in the

2483

# full-replacement delta, so we inspect the first and only patch in the

2450

# delta for this prefix.

2484

# delta for this prefix.

2451

hlen = struct.calcsize(">lll")

2485

hlen = struct.calcsize(">lll")

2452

if len(delta) <= hlen:

2486

if len(delta) <= hlen:

2453

return False

2487

return False

2454

2488

2455

oldlen = self.rawsize(baserev)

2489

oldlen = self.rawsize(baserev)

2456

newlen = len(delta) - hlen

2490

newlen = len(delta) - hlen

2457

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2491

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2458

return False

2492

return False

2459

2493

2460

add = "\1\ncensored:"

2494

add = "\1\ncensored:"

2461

addlen = len(add)

2495

addlen = len(add)

2462

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2496

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2463

2497

2464

def getstrippoint(self, minlink):

2498

def getstrippoint(self, minlink):

2465

"""find the minimum rev that must be stripped to strip the linkrev

2499

"""find the minimum rev that must be stripped to strip the linkrev

2466

2500

2467

Returns a tuple containing the minimum rev and a set of all revs that

2501

Returns a tuple containing the minimum rev and a set of all revs that

2468

have linkrevs that will be broken by this strip.

2502

have linkrevs that will be broken by this strip.

2469

"""

2503

"""

2470

brokenrevs = set()

2504

brokenrevs = set()

2471

strippoint = len(self)

2505

strippoint = len(self)

2472

2506

2473

heads = {}

2507

heads = {}

2474

futurelargelinkrevs = set()

2508

futurelargelinkrevs = set()

2475

for head in self.headrevs():

2509

for head in self.headrevs():

2476

headlinkrev = self.linkrev(head)

2510

headlinkrev = self.linkrev(head)

2477

heads[head] = headlinkrev

2511

heads[head] = headlinkrev

2478

if headlinkrev >= minlink:

2512

if headlinkrev >= minlink:

2479

futurelargelinkrevs.add(headlinkrev)

2513

futurelargelinkrevs.add(headlinkrev)

2480

2514

2481

# This algorithm involves walking down the rev graph, starting at the

2515

# This algorithm involves walking down the rev graph, starting at the

2482

# heads. Since the revs are topologically sorted according to linkrev,

2516

# heads. Since the revs are topologically sorted according to linkrev,

2483

# once all head linkrevs are below the minlink, we know there are

2517

# once all head linkrevs are below the minlink, we know there are

2484

# no more revs that could have a linkrev greater than minlink.

2518

# no more revs that could have a linkrev greater than minlink.

2485

# So we can stop walking.

2519

# So we can stop walking.

2486

while futurelargelinkrevs:

2520

while futurelargelinkrevs:

2487

strippoint -= 1

2521

strippoint -= 1

2488

linkrev = heads.pop(strippoint)

2522

linkrev = heads.pop(strippoint)

2489

2523

2490

if linkrev < minlink:

2524

if linkrev < minlink:

2491

brokenrevs.add(strippoint)

2525

brokenrevs.add(strippoint)

2492

else:

2526

else:

2493

futurelargelinkrevs.remove(linkrev)

2527

futurelargelinkrevs.remove(linkrev)

2494

2528

2495

for p in self.parentrevs(strippoint):

2529

for p in self.parentrevs(strippoint):

2496

if p != nullrev:

2530

if p != nullrev:

2497

plinkrev = self.linkrev(p)

2531

plinkrev = self.linkrev(p)

2498

heads[p] = plinkrev

2532

heads[p] = plinkrev

2499

if plinkrev >= minlink:

2533

if plinkrev >= minlink:

2500

futurelargelinkrevs.add(plinkrev)

2534

futurelargelinkrevs.add(plinkrev)

2501

2535

2502

return strippoint, brokenrevs

2536

return strippoint, brokenrevs

2503

2537

2504

def strip(self, minlink, transaction):

2538

def strip(self, minlink, transaction):

2505

"""truncate the revlog on the first revision with a linkrev >= minlink

2539

"""truncate the revlog on the first revision with a linkrev >= minlink

2506

2540

2507

This function is called when we're stripping revision minlink and

2541

This function is called when we're stripping revision minlink and

2508

its descendants from the repository.

2542

its descendants from the repository.

2509

2543

2510

We have to remove all revisions with linkrev >= minlink, because

2544

We have to remove all revisions with linkrev >= minlink, because

2511

the equivalent changelog revisions will be renumbered after the

2545

the equivalent changelog revisions will be renumbered after the

2512

strip.

2546

strip.

2513

2547

2514

So we truncate the revlog on the first of these revisions, and

2548

So we truncate the revlog on the first of these revisions, and

2515

trust that the caller has saved the revisions that shouldn't be

2549

trust that the caller has saved the revisions that shouldn't be

2516

removed and that it'll re-add them after this truncation.

2550

removed and that it'll re-add them after this truncation.

2517

"""

2551

"""

2518

if len(self) == 0:

2552

if len(self) == 0:

2519

return

2553

return

2520

2554

2521

rev, _ = self.getstrippoint(minlink)

2555

rev, _ = self.getstrippoint(minlink)

2522

if rev == len(self):

2556

if rev == len(self):

2523

return

2557

return

2524

2558

2525

# first truncate the files on disk

2559

# first truncate the files on disk

2526

end = self.start(rev)

2560

end = self.start(rev)

2527

if not self._inline:

2561

if not self._inline:

2528

transaction.add(self.datafile, end)

2562

transaction.add(self.datafile, end)

2529

end = rev * self._io.size

2563

end = rev * self._io.size

2530

else:

2564

else:

2531

end += rev * self._io.size

2565

end += rev * self._io.size

2532

2566

2533

transaction.add(self.indexfile, end)

2567

transaction.add(self.indexfile, end)

2534

2568

2535

# then reset internal state in memory to forget those revisions

2569

# then reset internal state in memory to forget those revisions

2536

self._cache = None

2570

self._cache = None

2537

self._chaininfocache = {}

2571

self._chaininfocache = {}

2538

self._chunkclear()

2572

self._chunkclear()

2539

for x in xrange(rev, len(self)):

2573

for x in xrange(rev, len(self)):

2540

del self.nodemap[self.node(x)]

2574

del self.nodemap[self.node(x)]

2541

2575

2542

del self.index[rev:-1]

2576

del self.index[rev:-1]

2543

self._nodepos = None

2577

self._nodepos = None

2544

2578

2545

def checksize(self):

2579

def checksize(self):

2546

expected = 0

2580

expected = 0

2547

if len(self):

2581

if len(self):

2548

expected = max(0, self.end(len(self) - 1))

2582

expected = max(0, self.end(len(self) - 1))

2549

2583

2550

try:

2584

try:

2551

with self._datafp() as f:

2585

with self._datafp() as f:

2552

f.seek(0, 2)

2586

f.seek(0, 2)

2553

actual = f.tell()

2587

actual = f.tell()

2554

dd = actual - expected

2588

dd = actual - expected

2555

except IOError as inst:

2589

except IOError as inst:

2556

if inst.errno != errno.ENOENT:

2590

if inst.errno != errno.ENOENT:

2557

raise

2591

raise

2558

dd = 0

2592

dd = 0

2559

2593

2560

try:

2594

try:

2561

f = self.opener(self.indexfile)

2595

f = self.opener(self.indexfile)

2562

f.seek(0, 2)

2596

f.seek(0, 2)

2563

actual = f.tell()

2597

actual = f.tell()

2564

f.close()

2598

f.close()

2565

s = self._io.size

2599

s = self._io.size

2566

i = max(0, actual // s)

2600

i = max(0, actual // s)

2567

di = actual - (i * s)

2601

di = actual - (i * s)

2568

if self._inline:

2602

if self._inline:

2569

databytes = 0

2603

databytes = 0

2570

for r in self:

2604

for r in self:

2571

databytes += max(0, self.length(r))

2605

databytes += max(0, self.length(r))

2572

dd = 0

2606

dd = 0

2573

di = actual - len(self) * s - databytes

2607

di = actual - len(self) * s - databytes

2574

except IOError as inst:

2608

except IOError as inst:

2575

if inst.errno != errno.ENOENT:

2609

if inst.errno != errno.ENOENT:

2576

raise

2610

raise

2577

di = 0

2611

di = 0

2578

2612

2579

return (dd, di)

2613

return (dd, di)

2580

2614

2581

def files(self):

2615

def files(self):

2582

res = [self.indexfile]

2616

res = [self.indexfile]

2583

if not self._inline:

2617

if not self._inline:

2584

res.append(self.datafile)

2618

res.append(self.datafile)

2585

return res

2619

return res

2586

2620

2587

DELTAREUSEALWAYS = 'always'

2621

DELTAREUSEALWAYS = 'always'

2588

DELTAREUSESAMEREVS = 'samerevs'

2622

DELTAREUSESAMEREVS = 'samerevs'

2589

DELTAREUSENEVER = 'never'

2623

DELTAREUSENEVER = 'never'

2590

2624

2591

DELTAREUSEFULLADD = 'fulladd'

2625

DELTAREUSEFULLADD = 'fulladd'

2592

2626

2593

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2627

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2594

2628

2595

def clone(self, tr, destrevlog, addrevisioncb=None,

2629

def clone(self, tr, destrevlog, addrevisioncb=None,

2596

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2630

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2597

"""Copy this revlog to another, possibly with format changes.

2631

"""Copy this revlog to another, possibly with format changes.

2598

2632

2599

The destination revlog will contain the same revisions and nodes.

2633

The destination revlog will contain the same revisions and nodes.

2600

However, it may not be bit-for-bit identical due to e.g. delta encoding

2634

However, it may not be bit-for-bit identical due to e.g. delta encoding

2601

differences.

2635

differences.

2602

2636

2603

The ``deltareuse`` argument control how deltas from the existing revlog

2637

The ``deltareuse`` argument control how deltas from the existing revlog

2604

are preserved in the destination revlog. The argument can have the

2638

are preserved in the destination revlog. The argument can have the

2605

following values:

2639

following values:

2606

2640

2607

DELTAREUSEALWAYS

2641

DELTAREUSEALWAYS

2608

Deltas will always be reused (if possible), even if the destination

2642

Deltas will always be reused (if possible), even if the destination

2609

revlog would not select the same revisions for the delta. This is the

2643

revlog would not select the same revisions for the delta. This is the

2610

fastest mode of operation.

2644

fastest mode of operation.

2611

DELTAREUSESAMEREVS

2645

DELTAREUSESAMEREVS

2612

Deltas will be reused if the destination revlog would pick the same

2646

Deltas will be reused if the destination revlog would pick the same

2613

revisions for the delta. This mode strikes a balance between speed

2647

revisions for the delta. This mode strikes a balance between speed

2614

and optimization.

2648

and optimization.

2615

DELTAREUSENEVER

2649

DELTAREUSENEVER

2616

Deltas will never be reused. This is the slowest mode of execution.

2650

Deltas will never be reused. This is the slowest mode of execution.

2617

This mode can be used to recompute deltas (e.g. if the diff/delta

2651

This mode can be used to recompute deltas (e.g. if the diff/delta

2618

algorithm changes).

2652

algorithm changes).

2619

2653

2620

Delta computation can be slow, so the choice of delta reuse policy can

2654

Delta computation can be slow, so the choice of delta reuse policy can

2621

significantly affect run time.

2655

significantly affect run time.

2622

2656

2623

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2657

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2624

two extremes. Deltas will be reused if they are appropriate. But if the

2658

two extremes. Deltas will be reused if they are appropriate. But if the

2625

delta could choose a better revision, it will do so. This means if you

2659

delta could choose a better revision, it will do so. This means if you

2626

are converting a non-generaldelta revlog to a generaldelta revlog,

2660

are converting a non-generaldelta revlog to a generaldelta revlog,

2627

deltas will be recomputed if the delta's parent isn't a parent of the

2661

deltas will be recomputed if the delta's parent isn't a parent of the

2628

revision.

2662

revision.

2629

2663

2630

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2664

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2631

controls whether to compute deltas against both parents for merges.

2665

controls whether to compute deltas against both parents for merges.

2632

By default, the current default is used.

2666

By default, the current default is used.

2633

"""

2667

"""

2634

if deltareuse not in self.DELTAREUSEALL:

2668

if deltareuse not in self.DELTAREUSEALL:

2635

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2669

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2636

2670

2637

if len(destrevlog):

2671

if len(destrevlog):

2638

raise ValueError(_('destination revlog is not empty'))

2672

raise ValueError(_('destination revlog is not empty'))

2639

2673

2640

if getattr(self, 'filteredrevs', None):

2674

if getattr(self, 'filteredrevs', None):

2641

raise ValueError(_('source revlog has filtered revisions'))

2675

raise ValueError(_('source revlog has filtered revisions'))

2642

if getattr(destrevlog, 'filteredrevs', None):

2676

if getattr(destrevlog, 'filteredrevs', None):

2643

raise ValueError(_('destination revlog has filtered revisions'))

2677

raise ValueError(_('destination revlog has filtered revisions'))

2644

2678

2645

# lazydeltabase controls whether to reuse a cached delta, if possible.

2679

# lazydeltabase controls whether to reuse a cached delta, if possible.

2646

oldlazydeltabase = destrevlog._lazydeltabase

2680

oldlazydeltabase = destrevlog._lazydeltabase

2647

oldamd = destrevlog._aggressivemergedeltas

2681

oldamd = destrevlog._aggressivemergedeltas

2648

2682

2649

try:

2683

try:

2650

if deltareuse == self.DELTAREUSEALWAYS:

2684

if deltareuse == self.DELTAREUSEALWAYS:

2651

destrevlog._lazydeltabase = True

2685

destrevlog._lazydeltabase = True

2652

elif deltareuse == self.DELTAREUSESAMEREVS:

2686

elif deltareuse == self.DELTAREUSESAMEREVS:

2653

destrevlog._lazydeltabase = False

2687

destrevlog._lazydeltabase = False

2654

2688

2655

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2689

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2656

2690

2657

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2691

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2658

self.DELTAREUSESAMEREVS)

2692

self.DELTAREUSESAMEREVS)

2659

2693

2660

deltacomputer = _deltacomputer(destrevlog)

2694

deltacomputer = _deltacomputer(destrevlog)

2661

index = self.index

2695

index = self.index

2662

for rev in self:

2696

for rev in self:

2663

entry = index[rev]

2697

entry = index[rev]

2664

2698

2665

# Some classes override linkrev to take filtered revs into

2699

# Some classes override linkrev to take filtered revs into

2666

# account. Use raw entry from index.

2700

# account. Use raw entry from index.

2667

flags = entry[0] & 0xffff

2701

flags = entry[0] & 0xffff

2668

linkrev = entry[4]

2702

linkrev = entry[4]

2669

p1 = index[entry[5]][7]

2703

p1 = index[entry[5]][7]

2670

p2 = index[entry[6]][7]

2704

p2 = index[entry[6]][7]

2671

node = entry[7]

2705

node = entry[7]

2672

2706

2673

# (Possibly) reuse the delta from the revlog if allowed and

2707

# (Possibly) reuse the delta from the revlog if allowed and

2674

# the revlog chunk is a delta.

2708

# the revlog chunk is a delta.

2675

cachedelta = None

2709

cachedelta = None

2676

rawtext = None

2710

rawtext = None

2677

if populatecachedelta:

2711

if populatecachedelta:

2678

dp = self.deltaparent(rev)

2712

dp = self.deltaparent(rev)

2679

if dp != nullrev:

2713

if dp != nullrev:

2680

cachedelta = (dp, bytes(self._chunk(rev)))

2714

cachedelta = (dp, bytes(self._chunk(rev)))

2681

2715

2682

if not cachedelta:

2716

if not cachedelta:

2683

rawtext = self.revision(rev, raw=True)

2717

rawtext = self.revision(rev, raw=True)

2684

2718

2685

2719

2686

if deltareuse == self.DELTAREUSEFULLADD:

2720

if deltareuse == self.DELTAREUSEFULLADD:

2687

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2721

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2688

cachedelta=cachedelta,

2722

cachedelta=cachedelta,

2689

node=node, flags=flags,

2723

node=node, flags=flags,

2690

deltacomputer=deltacomputer)

2724

deltacomputer=deltacomputer)

2691

else:

2725

else:

2692

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2726

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2693

checkambig=False)

2727

checkambig=False)

2694

dfh = None

2728

dfh = None

2695

if not destrevlog._inline:

2729

if not destrevlog._inline:

2696

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2730

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2697

try:

2731

try:

2698

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2732

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2699

p2, flags, cachedelta, ifh, dfh,

2733

p2, flags, cachedelta, ifh, dfh,

2700

deltacomputer=deltacomputer)

2734

deltacomputer=deltacomputer)

2701

finally:

2735

finally:

2702

if dfh:

2736

if dfh:

2703

dfh.close()

2737

dfh.close()

2704

ifh.close()

2738

ifh.close()

2705

2739

2706

if addrevisioncb:

2740

if addrevisioncb:

2707

addrevisioncb(self, rev, node)

2741

addrevisioncb(self, rev, node)

2708

finally:

2742

finally:

2709

destrevlog._lazydeltabase = oldlazydeltabase

2743

destrevlog._lazydeltabase = oldlazydeltabase

2710

destrevlog._aggressivemergedeltas = oldamd

2744

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import hashlib
             import heapq
             import os
             import re
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .utils import (
                 stringutil,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # bitmark for flags that could cause rawdata content change
             REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             _mdre = re.compile('\1\n')
             def parsemeta(text):
                 """return (metadatadict, metadatasize)"""
                 # text can be buffer, so we can't use .startswith or .index
                 if text[:2] != '\1\n':
                     return None, None
                 s = _mdre.search(text, 2).start()
                 mtext = text[2:s]
                 meta = {}
                 for l in mtext.splitlines():
                     k, v = l.split(": ", 1)
                     meta[k] = v
                 return meta, (s + 2)
             def packmeta(meta, text):
                 keys = sorted(meta)
                 metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
                 return "\1\n%s\1\n%s" % (metatext, text)
             def _censoredtext(text):
                 m, offs = parsemeta(text)
                 return m and "censored" in m
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             class _testrevlog(object):
                 """minimalist fake revlog to use in doctests"""
                 def __init__(self, data, density=0.5, mingap=0):
                     """data is an list of revision payload boundaries"""
                     self._data = data
                     self._srdensitythreshold = density
                     self._srmingapsize = mingap
                 def start(self, rev):
                     if rev == 0:
                         return 0
                     return self._data[rev - 1]
                 def end(self, rev):
                     return self._data[rev]
                 def length(self, rev):
                     return self.end(rev) - self.start(rev)
             def _trimchunk(revlog, revs, startidx, endidx=None):
                 """returns revs[startidx:endidx] without empty trailing revs
                 Doctest Setup
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ...  21, #5
                 ...  21, #6 (empty)
                 ... ])
                 Contiguous cases:
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
                 [0, 1, 2, 3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
                 [0, 1, 2, 3, 4]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
                 [0, 1, 2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
                 [2]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
                 [3, 4, 5]
                 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
                 [3, 4]
                 Discontiguous cases:
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
                 [1, 3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
                 [1]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
                 [3, 5]
                 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
                 [3, 5]
                 """
                 length = revlog.length
                 if endidx is None:
                     endidx = len(revs)
                 # Trim empty revs at the end, but never the very first revision of a chain
                 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
                     endidx -= 1
                 return revs[startidx:endidx]
             def _segmentspan(revlog, revs):
                 """Get the byte span of a segment of revisions
                 revs is a sorted array of revision numbers
                 >>> revlog = _testrevlog([
                 ...  5,  #0
                 ...  10, #1
                 ...  12, #2
                 ...  12, #3 (empty)
                 ...  17, #4
                 ... ])
                 >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
                 >>> _segmentspan(revlog, [0, 4])
                 >>> _segmentspan(revlog, [3, 4])
                 >>> _segmentspan(revlog, [1, 2, 3,])
                 >>> _segmentspan(revlog, [1, 3])
                 """
                 if not revs:
                     return 0
                 return revlog.end(revs[-1]) - revlog.start(revs[0])
             def _slicechunk(revlog, revs):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
+                The initial chunk is sliced until the overall density (payload/chunks-span
+                ratio) is above `revlog._srdensitythreshold`. No gap smaller than
+                `revlog._srmingapsize` is skipped.
+                >>> revlog = _testrevlog([
+                ...  5,  #00 (5)
+                ...  10, #01 (5)
+                ...  12, #02 (2)
+                ...  12, #03 (empty)
+                ...  27, #04 (15)
+                ...  31, #05 (4)
+                ...  31, #06 (empty)
+                ...  42, #07 (11)
+                ...  47, #08 (5)
+                ...  47, #09 (empty)
+                ...  48, #10 (1)
+                ...  51, #11 (3)
+                ...  74, #12 (23)
+                ...  85, #13 (11)
+                ...  86, #14 (1)
+                ...  91, #15 (5)
+                ... ])
+                >>> list(_slicechunk(revlog, range(16)))
+                [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
+                >>> list(_slicechunk(revlog, [0, 15]))
+                [[0], [15]]
+                >>> list(_slicechunk(revlog, [0, 11, 15]))
+                [[0], [11], [15]]
+                >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
+                [[0], [11, 13, 15]]
+                >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
+                [[1, 2], [5, 8, 10, 11], [14]]
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 readdata = deltachainspan = _segmentspan(revlog, revs)
                 if deltachainspan < revlog._srmingapsize:
                     yield revs
                     return
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 if density >= revlog._srdensitythreshold:
                     yield revs
                     return
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     # Skip empty revisions to form larger holes
                     if revlen == 0:
                         continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > revlog._srmingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < revlog._srdensitythreshold:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
                     chunk = _trimchunk(revlog, revs, previdx, idx)
                     if chunk:
                         yield chunk
                     previdx = idx
                 chunk = _trimchunk(revlog, revs, previdx)
                 if chunk:
                     yield chunk
             @attr.s(slots=True, frozen=True)
             class _deltainfo(object):
                 distance = attr.ib()
                 deltalen = attr.ib()
                 data = attr.ib()
                 base = attr.ib()
                 chainbase = attr.ib()
                 chainlen = attr.ib()
                 compresseddeltalen = attr.ib()
             class _deltacomputer(object):
                 def __init__(self, revlog):
                     self.revlog = revlog
                 def _getcandidaterevs(self, p1, p2, cachedelta):
                     """
                     Provides revisions that present an interest to be diffed against,
                     grouped by level of easiness.
                     """
                     revlog = self.revlog
                     gdelta = revlog._generaldelta
                     curr = len(revlog)
                     prev = curr - 1
                     p1r, p2r = revlog.rev(p1), revlog.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev and revlog.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and gdelta and revlog._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             yield (cachedelta[0],)
                             tested.add(cachedelta[0])
                         if gdelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if not revlog._aggressivemergedeltas and len(parents) == 2:
                                 parents.sort()
                                 # To minimize the chance of having to build a fulltext,
                                 # pick first whichever parent is closest to us (max rev)
                                 yield (parents[1],)
                                 # then the other one (min rev) if the first did not fit
                                 yield (parents[0],)
                                 tested.update(parents)
                             elif len(parents) > 0:
                                 # Test all parents (1 or 2), and keep the best candidate
                                 yield parents
                                 tested.update(parents)
                         if prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             yield (prev,)
                             tested.add(prev)
                 def buildtext(self, revinfo, fh):
                     """Builds a fulltext version of a revision
                     revinfo: _revisioninfo instance that contains all needed info
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     """
                     btext = revinfo.btext
                     if btext[0] is not None:
                         return btext[0]
                     revlog = self.revlog
                     cachedelta = revinfo.cachedelta
                     flags = revinfo.flags
                     node = revinfo.node
                     baserev = cachedelta[0]
                     delta = cachedelta[1]
                     # special case deltas which replace entire base; no need to decode
                     # base revision. this neatly avoids censored bases, which throw when
                     # they're decoded.
                     hlen = struct.calcsize(">lll")
                     if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
                                                                len(delta) - hlen):
                         btext[0] = delta[hlen:]
                     else:
                         # deltabase is rawtext before changed by flag processors, which is
                         # equivalent to non-raw text
                         basetext = revlog.revision(baserev, _df=fh, raw=False)
                         btext[0] = mdiff.patch(basetext, delta)
                     try:
                         res = revlog._processflags(btext[0], flags, 'read', raw=True)
                         btext[0], validatehash = res
                         if validatehash:
                             revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
                         if flags & REVIDX_ISCENSORED:
                             raise RevlogError(_('node %s is not censored') % node)
                     except CensoredNodeError:
                         # must pass the censored index flag to add censored revisions
                         if not flags & REVIDX_ISCENSORED:
                             raise
                     return btext[0]
                 def _builddeltadiff(self, base, revinfo, fh):
                     revlog = self.revlog
                     t = self.buildtext(revinfo, fh)
                     if revlog.iscensored(base):
                         # deltas based on a censored revision must replace the
                         # full content in one patch, so delta works everywhere
                         header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
                         delta = header + t
                     else:
                         ptext = revlog.revision(base, _df=fh, raw=True)
                         delta = mdiff.textdiff(ptext, t)
                     return delta
                 def _builddeltainfo(self, revinfo, base, fh):
                     # can we use the cached delta?
                     if revinfo.cachedelta and revinfo.cachedelta[0] == base:
                         delta = revinfo.cachedelta[1]
                     else:
                         delta = self._builddeltadiff(base, revinfo, fh)
                     revlog = self.revlog
                     header, data = revlog.compress(delta)
                     deltalen = len(header) + len(data)
                     chainbase = revlog.chainbase(base)
                     offset = revlog.end(len(revlog) - 1)
                     dist = deltalen + offset - revlog.start(chainbase)
                     if revlog._generaldelta:
                         deltabase = base
                     else:
                         deltabase = chainbase
                     chainlen, compresseddeltalen = revlog._chaininfo(base)
                     chainlen += 1
                     compresseddeltalen += deltalen
                     return _deltainfo(dist, deltalen, (header, data), deltabase,
                                      chainbase, chainlen, compresseddeltalen)
                 def finddeltainfo(self, revinfo, fh):
                     """Find an acceptable delta against a candidate revision
                     revinfo: information about the revision (instance of _revisioninfo)
                     fh:      file handle to either the .i or the .d revlog file,
                              depending on whether it is inlined or not
                     Returns the first acceptable candidate revision, as ordered by
                     _getcandidaterevs
                     """
                     cachedelta = revinfo.cachedelta
                     p1 = revinfo.p1
                     p2 = revinfo.p2
                     revlog = self.revlog
                     deltainfo = None
                     for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
                         nominateddeltas = []
                         for candidaterev in candidaterevs:
                             # no delta for rawtext-changing revs (see "candelta" for why)
                             if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                                 continue
                             candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
                             if revlog._isgooddeltainfo(candidatedelta, revinfo):
                                 nominateddeltas.append(candidatedelta)
                         if nominateddeltas:
                             deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                             break
                     return deltainfo
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._withsparseread = bool(opts.get('with-sparse-read', False))
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                     self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         else:
                             assert p < len(i)
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     """True if revision 'end' is an descendant of revision 'start'
                     A revision is considered as a descendant of itself."""
                     if start == nullrev:
                         return True
                     elif start == end:
                         return True
                     return start in self._commonancestorsheads(start, end)
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     a, b = self.rev(a), self.rev(b)
                     return self.descendant(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except RevlogError:
                             if node != wdirid:
                                 raise LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except RevlogError:
                         if self._censorable and _censoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional _deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') %
                                               stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddeltainfo(self, deltainfo, revinfo):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if deltainfo is None:
                         return False
                     # - 'deltainfo.distance' is the distance from the base revision --
                     #   bounding it limits the amount of I/O we need to do.
                     # - 'deltainfo.compresseddeltalen' is the sum of the total size of
                     #   deltas we need to apply -- bounding it limits the amount of CPU
                     #   we consume.
                     textlen = revinfo.textlen
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = deltainfo.distance # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if (deltainfo.distance > maxdist or deltainfo.deltalen > textlen or
                         deltainfo.compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and deltainfo.chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid or node in wdirfilenodeids:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = _deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     # no delta for flag processor revision (see "candelta" for why)
                     # not calling candelta since only one revision needs test, also to
                     # avoid overhead fetching flags again.
                     if flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
                         deltainfo = None
                     else:
                         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     if deltainfo is not None:
                         base = deltainfo.base
                         chainbase = deltainfo.chainbase
                         data = deltainfo.data
                         l = deltainfo.deltalen
                     else:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer._buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = _deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = _deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd