upstream/mercurial-mirror Commit - r40091:0a4625ff

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import contextlib

17

import contextlib

18

import errno

18

import errno

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullhex,

27

nullhex,

28

nullid,

28

nullid,

29

nullrev,

29

nullrev,

30

short,

30

short,

31

wdirfilenodeids,

31

wdirfilenodeids,

32

wdirhex,

32

wdirhex,

33

wdirid,

33

wdirid,

34

wdirrev,

34

wdirrev,

35

)

35

)

36

from .i18n import _

36

from .i18n import _

37

from .revlogutils.constants import (

37

from .revlogutils.constants import (

38

FLAG_GENERALDELTA,

38

FLAG_GENERALDELTA,

39

FLAG_INLINE_DATA,

39

FLAG_INLINE_DATA,

40

REVIDX_DEFAULT_FLAGS,

40

REVIDX_DEFAULT_FLAGS,

41

REVIDX_ELLIPSIS,

41

REVIDX_ELLIPSIS,

42

REVIDX_EXTSTORED,

42

REVIDX_EXTSTORED,

43

REVIDX_FLAGS_ORDER,

43

REVIDX_FLAGS_ORDER,

44

REVIDX_ISCENSORED,

44

REVIDX_ISCENSORED,

45

REVIDX_KNOWN_FLAGS,

45

REVIDX_KNOWN_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

46

REVIDX_RAWTEXT_CHANGING_FLAGS,

47

REVLOGV0,

47

REVLOGV0,

48

REVLOGV1,

48

REVLOGV1,

49

REVLOGV1_FLAGS,

49

REVLOGV1_FLAGS,

50

REVLOGV2,

50

REVLOGV2,

51

REVLOGV2_FLAGS,

51

REVLOGV2_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

52

REVLOG_DEFAULT_FLAGS,

53

REVLOG_DEFAULT_FORMAT,

53

REVLOG_DEFAULT_FORMAT,

54

REVLOG_DEFAULT_VERSION,

54

REVLOG_DEFAULT_VERSION,

55

)

55

)

56

from .thirdparty import (

56

from .thirdparty import (

57

attr,

57

attr,

58

)

58

)

59

from . import (

59

from . import (

60

ancestor,

60

ancestor,

61

dagop,

61

dagop,

62

error,

62

error,

63

mdiff,

63

mdiff,

64

policy,

64

policy,

65

pycompat,

65

pycompat,

66

repository,

66

repository,

67

templatefilters,

67

templatefilters,

68

util,

68

util,

69

)

69

)

70

from .revlogutils import (

70

from .revlogutils import (

71

deltas as deltautil,

71

deltas as deltautil,

72

)

72

)

73

from .utils import (

73

from .utils import (

74

interfaceutil,

74

interfaceutil,

75

storageutil,

75

storageutil,

76

stringutil,

76

stringutil,

77

)

77

)

78

79

# blanked usage of all the name to prevent pyflakes constraints

79

# blanked usage of all the name to prevent pyflakes constraints

80

# We need these name available in the module for extensions.

80

# We need these name available in the module for extensions.

81

REVLOGV0

81

REVLOGV0

82

REVLOGV1

82

REVLOGV1

83

REVLOGV2

83

REVLOGV2

84

FLAG_INLINE_DATA

84

FLAG_INLINE_DATA

85

FLAG_GENERALDELTA

85

FLAG_GENERALDELTA

86

REVLOG_DEFAULT_FLAGS

86

REVLOG_DEFAULT_FLAGS

87

REVLOG_DEFAULT_FORMAT

87

REVLOG_DEFAULT_FORMAT

88

REVLOG_DEFAULT_VERSION

88

REVLOG_DEFAULT_VERSION

89

REVLOGV1_FLAGS

89

REVLOGV1_FLAGS

90

REVLOGV2_FLAGS

90

REVLOGV2_FLAGS

91

REVIDX_ISCENSORED

91

REVIDX_ISCENSORED

92

REVIDX_ELLIPSIS

92

REVIDX_ELLIPSIS

93

REVIDX_EXTSTORED

93

REVIDX_EXTSTORED

94

REVIDX_DEFAULT_FLAGS

94

REVIDX_DEFAULT_FLAGS

95

REVIDX_FLAGS_ORDER

95

REVIDX_FLAGS_ORDER

96

REVIDX_KNOWN_FLAGS

96

REVIDX_KNOWN_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

97

REVIDX_RAWTEXT_CHANGING_FLAGS

98

99

parsers = policy.importmod(r'parsers')

99

parsers = policy.importmod(r'parsers')

100

101

# Aliased for performance.

101

# Aliased for performance.

102

_zlibdecompress = zlib.decompress

102

_zlibdecompress = zlib.decompress

103

104

# max size of revlog with inline data

104

# max size of revlog with inline data

105

_maxinline = 131072

105

_maxinline = 131072

106

_chunksize = 1048576

106

_chunksize = 1048576

107

108

# Store flag processors (cf. 'addflagprocessor()' to register)

108

# Store flag processors (cf. 'addflagprocessor()' to register)

109

_flagprocessors = {

109

_flagprocessors = {

110

REVIDX_ISCENSORED: None,

110

REVIDX_ISCENSORED: None,

111

}

111

}

112

113

# Flag processors for REVIDX_ELLIPSIS.

113

# Flag processors for REVIDX_ELLIPSIS.

114

def ellipsisreadprocessor(rl, text):

114

def ellipsisreadprocessor(rl, text):

115

return text, False

115

return text, False

116

117

def ellipsiswriteprocessor(rl, text):

117

def ellipsiswriteprocessor(rl, text):

118

return text, False

118

return text, False

119

120

def ellipsisrawprocessor(rl, text):

120

def ellipsisrawprocessor(rl, text):

121

return False

121

return False

122

123

ellipsisprocessor = (

123

ellipsisprocessor = (

124

ellipsisreadprocessor,

124

ellipsisreadprocessor,

125

ellipsiswriteprocessor,

125

ellipsiswriteprocessor,

126

ellipsisrawprocessor,

126

ellipsisrawprocessor,

127

)

127

)

128

129

def addflagprocessor(flag, processor):

129

def addflagprocessor(flag, processor):

130

"""Register a flag processor on a revision data flag.

130

"""Register a flag processor on a revision data flag.

131

132

Invariant:

132

Invariant:

133

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

133

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,

134

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

134

and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.

135

- Only one flag processor can be registered on a specific flag.

135

- Only one flag processor can be registered on a specific flag.

136

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

136

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

137

following signatures:

137

following signatures:

138

- (read) f(self, rawtext) -> text, bool

138

- (read) f(self, rawtext) -> text, bool

139

- (write) f(self, text) -> rawtext, bool

139

- (write) f(self, text) -> rawtext, bool

140

- (raw) f(self, rawtext) -> bool

140

- (raw) f(self, rawtext) -> bool

141

"text" is presented to the user. "rawtext" is stored in revlog data, not

141

"text" is presented to the user. "rawtext" is stored in revlog data, not

142

directly visible to the user.

142

directly visible to the user.

143

The boolean returned by these transforms is used to determine whether

143

The boolean returned by these transforms is used to determine whether

144

the returned text can be used for hash integrity checking. For example,

144

the returned text can be used for hash integrity checking. For example,

145

if "write" returns False, then "text" is used to generate hash. If

145

if "write" returns False, then "text" is used to generate hash. If

146

"write" returns True, that basically means "rawtext" returned by "write"

146

"write" returns True, that basically means "rawtext" returned by "write"

147

should be used to generate hash. Usually, "write" and "read" return

147

should be used to generate hash. Usually, "write" and "read" return

148

different booleans. And "raw" returns a same boolean as "write".

148

different booleans. And "raw" returns a same boolean as "write".

149

150

Note: The 'raw' transform is used for changegroup generation and in some

150

Note: The 'raw' transform is used for changegroup generation and in some

151

debug commands. In this case the transform only indicates whether the

151

debug commands. In this case the transform only indicates whether the

152

contents can be used for hash integrity checks.

152

contents can be used for hash integrity checks.

153

"""

153

"""

154

if not flag & REVIDX_KNOWN_FLAGS:

154

if not flag & REVIDX_KNOWN_FLAGS:

155

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

155

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

156

raise error.ProgrammingError(msg)

156

raise error.ProgrammingError(msg)

157

if flag not in REVIDX_FLAGS_ORDER:

157

if flag not in REVIDX_FLAGS_ORDER:

158

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

158

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

159

raise error.ProgrammingError(msg)

159

raise error.ProgrammingError(msg)

160

if flag in _flagprocessors:

160

if flag in _flagprocessors:

161

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

161

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

162

raise error.Abort(msg)

162

raise error.Abort(msg)

163

_flagprocessors[flag] = processor

163

_flagprocessors[flag] = processor

164

165

def getoffset(q):

165

def getoffset(q):

166

return int(q >> 16)

166

return int(q >> 16)

167

168

def gettype(q):

168

def gettype(q):

169

return int(q & 0xFFFF)

169

return int(q & 0xFFFF)

170

171

def offset_type(offset, type):

171

def offset_type(offset, type):

172

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

172

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

173

raise ValueError('unknown revlog index flags')

173

raise ValueError('unknown revlog index flags')

174

return int(int(offset) << 16 | type)

174

return int(int(offset) << 16 | type)

175

176

@attr.s(slots=True, frozen=True)

176

@attr.s(slots=True, frozen=True)

177

class _revisioninfo(object):

177

class _revisioninfo(object):

178

"""Information about a revision that allows building its fulltext

178

"""Information about a revision that allows building its fulltext

179

node: expected hash of the revision

179

node: expected hash of the revision

180

p1, p2: parent revs of the revision

180

p1, p2: parent revs of the revision

181

btext: built text cache consisting of a one-element list

181

btext: built text cache consisting of a one-element list

182

cachedelta: (baserev, uncompressed_delta) or None

182

cachedelta: (baserev, uncompressed_delta) or None

183

flags: flags associated to the revision storage

183

flags: flags associated to the revision storage

184

185

One of btext[0] or cachedelta must be set.

185

One of btext[0] or cachedelta must be set.

186

"""

186

"""

187

node = attr.ib()

187

node = attr.ib()

188

p1 = attr.ib()

188

p1 = attr.ib()

189

p2 = attr.ib()

189

p2 = attr.ib()

190

btext = attr.ib()

190

btext = attr.ib()

191

textlen = attr.ib()

191

textlen = attr.ib()

192

cachedelta = attr.ib()

192

cachedelta = attr.ib()

193

flags = attr.ib()

193

flags = attr.ib()

194

195

@interfaceutil.implementer(repository.irevisiondelta)

195

@interfaceutil.implementer(repository.irevisiondelta)

196

@attr.s(slots=True)

196

@attr.s(slots=True)

197

class revlogrevisiondelta(object):

197

class revlogrevisiondelta(object):

198

node = attr.ib()

198

node = attr.ib()

199

p1node = attr.ib()

199

p1node = attr.ib()

200

p2node = attr.ib()

200

p2node = attr.ib()

201

basenode = attr.ib()

201

basenode = attr.ib()

202

flags = attr.ib()

202

flags = attr.ib()

203

baserevisionsize = attr.ib()

203

baserevisionsize = attr.ib()

204

revision = attr.ib()

204

revision = attr.ib()

205

delta = attr.ib()

205

delta = attr.ib()

206

linknode = attr.ib(default=None)

206

linknode = attr.ib(default=None)

207

208

@interfaceutil.implementer(repository.iverifyproblem)

208

@interfaceutil.implementer(repository.iverifyproblem)

209

@attr.s(frozen=True)

209

@attr.s(frozen=True)

210

class revlogproblem(object):

210

class revlogproblem(object):

211

warning = attr.ib(default=None)

211

warning = attr.ib(default=None)

212

error = attr.ib(default=None)

212

error = attr.ib(default=None)

213

node = attr.ib(default=None)

213

node = attr.ib(default=None)

214

215

# index v0:

215

# index v0:

216

# 4 bytes: offset

216

# 4 bytes: offset

217

# 4 bytes: compressed length

217

# 4 bytes: compressed length

218

# 4 bytes: base rev

218

# 4 bytes: base rev

219

# 4 bytes: link rev

219

# 4 bytes: link rev

220

# 20 bytes: parent 1 nodeid

220

# 20 bytes: parent 1 nodeid

221

# 20 bytes: parent 2 nodeid

221

# 20 bytes: parent 2 nodeid

222

# 20 bytes: nodeid

222

# 20 bytes: nodeid

223

indexformatv0 = struct.Struct(">4l20s20s20s")

223

indexformatv0 = struct.Struct(">4l20s20s20s")

224

indexformatv0_pack = indexformatv0.pack

224

indexformatv0_pack = indexformatv0.pack

225

indexformatv0_unpack = indexformatv0.unpack

225

indexformatv0_unpack = indexformatv0.unpack

226

227

class revlogoldindex(list):

227

class revlogoldindex(list):

228

def __getitem__(self, i):

228

def __getitem__(self, i):

229

if i == -1:

229

if i == -1:

230

return (0, 0, 0, -1, -1, -1, -1, nullid)

230

return (0, 0, 0, -1, -1, -1, -1, nullid)

231

return list.__getitem__(self, i)

231

return list.__getitem__(self, i)

232

233

class revlogoldio(object):

233

class revlogoldio(object):

234

def __init__(self):

234

def __init__(self):

235

self.size = indexformatv0.size

235

self.size = indexformatv0.size

236

237

def parseindex(self, data, inline):

237

def parseindex(self, data, inline):

238

s = self.size

238

s = self.size

239

index = []

239

index = []

240

nodemap = {nullid: nullrev}

240

nodemap = {nullid: nullrev}

241

n = off = 0

241

n = off = 0

242

l = len(data)

242

l = len(data)

243

while off + s <= l:

243

while off + s <= l:

244

cur = data[off:off + s]

244

cur = data[off:off + s]

245

off += s

245

off += s

246

e = indexformatv0_unpack(cur)

246

e = indexformatv0_unpack(cur)

247

# transform to revlogv1 format

247

# transform to revlogv1 format

248

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

248

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

249

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

249

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

250

index.append(e2)

250

index.append(e2)

251

nodemap[e[6]] = n

251

nodemap[e[6]] = n

252

n += 1

252

n += 1

253

254

return revlogoldindex(index), nodemap, None

254

return revlogoldindex(index), nodemap, None

255

256

def packentry(self, entry, node, version, rev):

256

def packentry(self, entry, node, version, rev):

257

if gettype(entry[0]):

257

if gettype(entry[0]):

258

raise error.RevlogError(_('index entry flags need revlog '

258

raise error.RevlogError(_('index entry flags need revlog '

259

'version 1'))

259

'version 1'))

260

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

260

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

261

node(entry[5]), node(entry[6]), entry[7])

261

node(entry[5]), node(entry[6]), entry[7])

262

return indexformatv0_pack(*e2)

262

return indexformatv0_pack(*e2)

263

264

# index ng:

264

# index ng:

265

# 6 bytes: offset

265

# 6 bytes: offset

266

# 2 bytes: flags

266

# 2 bytes: flags

267

# 4 bytes: compressed length

267

# 4 bytes: compressed length

268

# 4 bytes: uncompressed length

268

# 4 bytes: uncompressed length

269

# 4 bytes: base rev

269

# 4 bytes: base rev

270

# 4 bytes: link rev

270

# 4 bytes: link rev

271

# 4 bytes: parent 1 rev

271

# 4 bytes: parent 1 rev

272

# 4 bytes: parent 2 rev

272

# 4 bytes: parent 2 rev

273

# 32 bytes: nodeid

273

# 32 bytes: nodeid

274

indexformatng = struct.Struct(">Qiiiiii20s12x")

274

indexformatng = struct.Struct(">Qiiiiii20s12x")

275

indexformatng_pack = indexformatng.pack

275

indexformatng_pack = indexformatng.pack

276

versionformat = struct.Struct(">I")

276

versionformat = struct.Struct(">I")

277

versionformat_pack = versionformat.pack

277

versionformat_pack = versionformat.pack

278

versionformat_unpack = versionformat.unpack

278

versionformat_unpack = versionformat.unpack

279

280

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

280

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

281

# signed integer)

281

# signed integer)

282

_maxentrysize = 0x7fffffff

282

_maxentrysize = 0x7fffffff

283

284

class revlogio(object):

284

class revlogio(object):

285

def __init__(self):

285

def __init__(self):

286

self.size = indexformatng.size

286

self.size = indexformatng.size

287

288

def parseindex(self, data, inline):

288

def parseindex(self, data, inline):

289

# call the C implementation to parse the index data

289

# call the C implementation to parse the index data

290

index, cache = parsers.parse_index2(data, inline)

290

index, cache = parsers.parse_index2(data, inline)

291

return index, getattr(index, 'nodemap', None), cache

291

return index, getattr(index, 'nodemap', None), cache

292

293

def packentry(self, entry, node, version, rev):

293

def packentry(self, entry, node, version, rev):

294

p = indexformatng_pack(*entry)

294

p = indexformatng_pack(*entry)

295

if rev == 0:

295

if rev == 0:

296

p = versionformat_pack(version) + p[4:]

296

p = versionformat_pack(version) + p[4:]

297

return p

297

return p

298

299

class revlog(object):

299

class revlog(object):

300

"""

300

"""

301

the underlying revision storage object

301

the underlying revision storage object

302

303

A revlog consists of two parts, an index and the revision data.

303

A revlog consists of two parts, an index and the revision data.

304

305

The index is a file with a fixed record size containing

305

The index is a file with a fixed record size containing

306

information on each revision, including its nodeid (hash), the

306

information on each revision, including its nodeid (hash), the

307

nodeids of its parents, the position and offset of its data within

307

nodeids of its parents, the position and offset of its data within

308

the data file, and the revision it's based on. Finally, each entry

308

the data file, and the revision it's based on. Finally, each entry

309

contains a linkrev entry that can serve as a pointer to external

309

contains a linkrev entry that can serve as a pointer to external

310

data.

310

data.

311

312

The revision data itself is a linear collection of data chunks.

312

The revision data itself is a linear collection of data chunks.

313

Each chunk represents a revision and is usually represented as a

313

Each chunk represents a revision and is usually represented as a

314

delta against the previous chunk. To bound lookup time, runs of

314

delta against the previous chunk. To bound lookup time, runs of

315

deltas are limited to about 2 times the length of the original

315

deltas are limited to about 2 times the length of the original

316

version data. This makes retrieval of a version proportional to

316

version data. This makes retrieval of a version proportional to

317

its size, or O(1) relative to the number of revisions.

317

its size, or O(1) relative to the number of revisions.

318

319

Both pieces of the revlog are written to in an append-only

319

Both pieces of the revlog are written to in an append-only

320

fashion, which means we never need to rewrite a file to insert or

320

fashion, which means we never need to rewrite a file to insert or

321

remove data, and can use some simple techniques to avoid the need

321

remove data, and can use some simple techniques to avoid the need

322

for locking while reading.

322

for locking while reading.

323

324

If checkambig, indexfile is opened with checkambig=True at

324

If checkambig, indexfile is opened with checkambig=True at

325

writing, to avoid file stat ambiguity.

325

writing, to avoid file stat ambiguity.

326

327

If mmaplargeindex is True, and an mmapindexthreshold is set, the

327

If mmaplargeindex is True, and an mmapindexthreshold is set, the

328

index will be mmapped rather than read if it is larger than the

328

index will be mmapped rather than read if it is larger than the

329

configured threshold.

329

configured threshold.

330

331

If censorable is True, the revlog can have censored revisions.

331

If censorable is True, the revlog can have censored revisions.

332

"""

332

"""

333

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

333

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

334

mmaplargeindex=False, censorable=False):

334

mmaplargeindex=False, censorable=False):

335

"""

335

"""

336

create a revlog object

336

create a revlog object

337

338

opener is a function that abstracts the file opening operation

338

opener is a function that abstracts the file opening operation

339

and can be used to implement COW semantics or the like.

339

and can be used to implement COW semantics or the like.

340

"""

340

"""

341

self.indexfile = indexfile

341

self.indexfile = indexfile

342

self.datafile = datafile or (indexfile[:-2] + ".d")

342

self.datafile = datafile or (indexfile[:-2] + ".d")

343

self.opener = opener

343

self.opener = opener

344

# When True, indexfile is opened with checkambig=True at writing, to

344

# When True, indexfile is opened with checkambig=True at writing, to

345

# avoid file stat ambiguity.

345

# avoid file stat ambiguity.

346

self._checkambig = checkambig

346

self._checkambig = checkambig

347

self._censorable = censorable

347

self._censorable = censorable

348

# 3-tuple of (node, rev, text) for a raw revision.

348

# 3-tuple of (node, rev, text) for a raw revision.

349

self._revisioncache = None

349

self._revisioncache = None

350

# Maps rev to chain base rev.

350

# Maps rev to chain base rev.

351

self._chainbasecache = util.lrucachedict(100)

351

self._chainbasecache = util.lrucachedict(100)

352

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

352

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

353

self._chunkcache = (0, '')

353

self._chunkcache = (0, '')

354

# How much data to read and cache into the raw revlog data cache.

354

# How much data to read and cache into the raw revlog data cache.

355

self._chunkcachesize = 65536

355

self._chunkcachesize = 65536

356

self._maxchainlen = None

356

self._maxchainlen = None

357

self._deltabothparents = True

357

self._deltabothparents = True

358

self.index = []

358

self.index = []

359

# Mapping of partial identifiers to full nodes.

359

# Mapping of partial identifiers to full nodes.

360

self._pcache = {}

360

self._pcache = {}

361

# Mapping of revision integer to full node.

361

# Mapping of revision integer to full node.

362

self._nodecache = {nullid: nullrev}

362

self._nodecache = {nullid: nullrev}

363

self._nodepos = None

363

self._nodepos = None

364

self._compengine = 'zlib'

364

self._compengine = 'zlib'

365

self._maxdeltachainspan = -1

365

self._maxdeltachainspan = -1

366

self._withsparseread = False

366

self._withsparseread = False

367

self._sparserevlog = False

367

self._sparserevlog = False

368

self._srdensitythreshold = 0.50

368

self._srdensitythreshold = 0.50

369

self._srmingapsize = 262144

369

self._srmingapsize = 262144

370

371

# Make copy of flag processors so each revlog instance can support

371

# Make copy of flag processors so each revlog instance can support

372

# custom flags.

372

# custom flags.

373

self._flagprocessors = dict(_flagprocessors)

373

self._flagprocessors = dict(_flagprocessors)

374

375

mmapindexthreshold = None

375

mmapindexthreshold = None

376

v = REVLOG_DEFAULT_VERSION

376

v = REVLOG_DEFAULT_VERSION

377

opts = getattr(opener, 'options', None)

377

opts = getattr(opener, 'options', None)

378

if opts is not None:

378

if opts is not None:

379

if 'revlogv2' in opts:

379

if 'revlogv2' in opts:

380

# version 2 revlogs always use generaldelta.

380

# version 2 revlogs always use generaldelta.

381

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

381

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

382

elif 'revlogv1' in opts:

382

elif 'revlogv1' in opts:

383

if 'generaldelta' in opts:

383

if 'generaldelta' in opts:

384

v |= FLAG_GENERALDELTA

384

v |= FLAG_GENERALDELTA

385

else:

385

else:

386

v = 0

386

v = 0

387

if 'chunkcachesize' in opts:

387

if 'chunkcachesize' in opts:

388

self._chunkcachesize = opts['chunkcachesize']

388

self._chunkcachesize = opts['chunkcachesize']

389

if 'maxchainlen' in opts:

389

if 'maxchainlen' in opts:

390

self._maxchainlen = opts['maxchainlen']

390

self._maxchainlen = opts['maxchainlen']

391

if 'deltabothparents' in opts:

391

if 'deltabothparents' in opts:

392

self._deltabothparents = opts['deltabothparents']

392

self._deltabothparents = opts['deltabothparents']

393

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

393

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

394

if 'compengine' in opts:

394

if 'compengine' in opts:

395

self._compengine = opts['compengine']

395

self._compengine = opts['compengine']

396

if 'maxdeltachainspan' in opts:

396

if 'maxdeltachainspan' in opts:

397

self._maxdeltachainspan = opts['maxdeltachainspan']

397

self._maxdeltachainspan = opts['maxdeltachainspan']

398

if mmaplargeindex and 'mmapindexthreshold' in opts:

398

if mmaplargeindex and 'mmapindexthreshold' in opts:

399

mmapindexthreshold = opts['mmapindexthreshold']

399

mmapindexthreshold = opts['mmapindexthreshold']

400

self._sparserevlog = bool(opts.get('sparse-revlog', False))

400

self._sparserevlog = bool(opts.get('sparse-revlog', False))

401

withsparseread = bool(opts.get('with-sparse-read', False))

401

withsparseread = bool(opts.get('with-sparse-read', False))

402

# sparse-revlog forces sparse-read

402

# sparse-revlog forces sparse-read

403

self._withsparseread = self._sparserevlog or withsparseread

403

self._withsparseread = self._sparserevlog or withsparseread

404

if 'sparse-read-density-threshold' in opts:

404

if 'sparse-read-density-threshold' in opts:

405

self._srdensitythreshold = opts['sparse-read-density-threshold']

405

self._srdensitythreshold = opts['sparse-read-density-threshold']

406

if 'sparse-read-min-gap-size' in opts:

406

if 'sparse-read-min-gap-size' in opts:

407

self._srmingapsize = opts['sparse-read-min-gap-size']

407

self._srmingapsize = opts['sparse-read-min-gap-size']

408

if opts.get('enableellipsis'):

408

if opts.get('enableellipsis'):

409

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

409

self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor

410

411

if self._chunkcachesize <= 0:

411

if self._chunkcachesize <= 0:

412

raise error.RevlogError(_('revlog chunk cache size %r is not '

412

raise error.RevlogError(_('revlog chunk cache size %r is not '

413

'greater than 0') % self._chunkcachesize)

413

'greater than 0') % self._chunkcachesize)

414

elif self._chunkcachesize & (self._chunkcachesize - 1):

414

elif self._chunkcachesize & (self._chunkcachesize - 1):

415

raise error.RevlogError(_('revlog chunk cache size %r is not a '

415

raise error.RevlogError(_('revlog chunk cache size %r is not a '

416

'power of 2') % self._chunkcachesize)

416

'power of 2') % self._chunkcachesize)

417

418

self._loadindex(v, mmapindexthreshold)

419

420

def _loadindex(self, v, mmapindexthreshold):

418

indexdata = ''

421

indexdata = ''

419

self._initempty = True

422

self._initempty = True

420

try:

423

try:

421

with self._indexfp() as f:

424

with self._indexfp() as f:

422

if (mmapindexthreshold is not None and

425

if (mmapindexthreshold is not None and

423

self.opener.fstat(f).st_size >= mmapindexthreshold):

426

self.opener.fstat(f).st_size >= mmapindexthreshold):

424

indexdata = util.buffer(util.mmapread(f))

427

indexdata = util.buffer(util.mmapread(f))

425

else:

428

else:

426

indexdata = f.read()

429

indexdata = f.read()

427

if len(indexdata) > 0:

430

if len(indexdata) > 0:

428

v = versionformat_unpack(indexdata[:4])[0]

431

v = versionformat_unpack(indexdata[:4])[0]

429

self._initempty = False

432

self._initempty = False

430

except IOError as inst:

433

except IOError as inst:

431

if inst.errno != errno.ENOENT:

434

if inst.errno != errno.ENOENT:

432

raise

435

raise

433

436

434

self.version = v

437

self.version = v

435

self._inline = v & FLAG_INLINE_DATA

438

self._inline = v & FLAG_INLINE_DATA

436

self._generaldelta = v & FLAG_GENERALDELTA

439

self._generaldelta = v & FLAG_GENERALDELTA

437

flags = v & ~0xFFFF

440

flags = v & ~0xFFFF

438

fmt = v & 0xFFFF

441

fmt = v & 0xFFFF

439

if fmt == REVLOGV0:

442

if fmt == REVLOGV0:

440

if flags:

443

if flags:

441

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

444

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

442

'revlog %s') %

445

'revlog %s') %

443

(flags >> 16, fmt, self.indexfile))

446

(flags >> 16, fmt, self.indexfile))

444

elif fmt == REVLOGV1:

447

elif fmt == REVLOGV1:

445

if flags & ~REVLOGV1_FLAGS:

448

if flags & ~REVLOGV1_FLAGS:

446

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

449

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

447

'revlog %s') %

450

'revlog %s') %

448

(flags >> 16, fmt, self.indexfile))

451

(flags >> 16, fmt, self.indexfile))

449

elif fmt == REVLOGV2:

452

elif fmt == REVLOGV2:

450

if flags & ~REVLOGV2_FLAGS:

453

if flags & ~REVLOGV2_FLAGS:

451

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

454

raise error.RevlogError(_('unknown flags (%#04x) in version %d '

452

'revlog %s') %

455

'revlog %s') %

453

(flags >> 16, fmt, self.indexfile))

456

(flags >> 16, fmt, self.indexfile))

454

else:

457

else:

455

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

458

raise error.RevlogError(_('unknown version (%d) in revlog %s') %

456

(fmt, self.indexfile))

459

(fmt, self.indexfile))

457

460

458

self._storedeltachains = True

461

self._storedeltachains = True

459

462

460

self._io = revlogio()

463

self._io = revlogio()

461

if self.version == REVLOGV0:

464

if self.version == REVLOGV0:

462

self._io = revlogoldio()

465

self._io = revlogoldio()

463

try:

466

try:

464

d = self._io.parseindex(indexdata, self._inline)

467

d = self._io.parseindex(indexdata, self._inline)

465

except (ValueError, IndexError):

468

except (ValueError, IndexError):

466

raise error.RevlogError(_("index %s is corrupted") %

469

raise error.RevlogError(_("index %s is corrupted") %

467

self.indexfile)

470

self.indexfile)

468

self.index, nodemap, self._chunkcache = d

471

self.index, nodemap, self._chunkcache = d

469

if nodemap is not None:

472

if nodemap is not None:

470

self.nodemap = self._nodecache = nodemap

473

self.nodemap = self._nodecache = nodemap

471

if not self._chunkcache:

474

if not self._chunkcache:

472

self._chunkclear()

475

self._chunkclear()

473

# revnum -> (chain-length, sum-delta-length)

476

# revnum -> (chain-length, sum-delta-length)

474

self._chaininfocache = {}

477

self._chaininfocache = {}

475

# revlog header -> revlog compressor

478

# revlog header -> revlog compressor

476

self._decompressors = {}

479

self._decompressors = {}

477

480

478

@util.propertycache

481

@util.propertycache

479

def _compressor(self):

482

def _compressor(self):

480

return util.compengines[self._compengine].revlogcompressor()

483

return util.compengines[self._compengine].revlogcompressor()

481

484

482

def _indexfp(self, mode='r'):

485

def _indexfp(self, mode='r'):

483

"""file object for the revlog's index file"""

486

"""file object for the revlog's index file"""

484

args = {r'mode': mode}

487

args = {r'mode': mode}

485

if mode != 'r':

488

if mode != 'r':

486

args[r'checkambig'] = self._checkambig

489

args[r'checkambig'] = self._checkambig

487

if mode == 'w':

490

if mode == 'w':

488

args[r'atomictemp'] = True

491

args[r'atomictemp'] = True

489

return self.opener(self.indexfile, **args)

492

return self.opener(self.indexfile, **args)

490

493

491

def _datafp(self, mode='r'):

494

def _datafp(self, mode='r'):

492

"""file object for the revlog's data file"""

495

"""file object for the revlog's data file"""

493

return self.opener(self.datafile, mode=mode)

496

return self.opener(self.datafile, mode=mode)

494

497

495

@contextlib.contextmanager

498

@contextlib.contextmanager

496

def _datareadfp(self, existingfp=None):

499

def _datareadfp(self, existingfp=None):

497

"""file object suitable to read data"""

500

"""file object suitable to read data"""

498

if existingfp is not None:

501

if existingfp is not None:

499

yield existingfp

502

yield existingfp

500

else:

503

else:

501

if self._inline:

504

if self._inline:

502

func = self._indexfp

505

func = self._indexfp

503

else:

506

else:

504

func = self._datafp

507

func = self._datafp

505

with func() as fp:

508

with func() as fp:

506

yield fp

509

yield fp

507

510

508

def tip(self):

511

def tip(self):

509

return self.node(len(self.index) - 1)

512

return self.node(len(self.index) - 1)

510

def __contains__(self, rev):

513

def __contains__(self, rev):

511

return 0 <= rev < len(self)

514

return 0 <= rev < len(self)

512

def __len__(self):

515

def __len__(self):

513

return len(self.index)

516

return len(self.index)

514

def __iter__(self):

517

def __iter__(self):

515

return iter(pycompat.xrange(len(self)))

518

return iter(pycompat.xrange(len(self)))

516

def revs(self, start=0, stop=None):

519

def revs(self, start=0, stop=None):

517

"""iterate over all rev in this revlog (from start to stop)"""

520

"""iterate over all rev in this revlog (from start to stop)"""

518

return storageutil.iterrevs(len(self), start=start, stop=stop)

521

return storageutil.iterrevs(len(self), start=start, stop=stop)

519

522

520

@util.propertycache

523

@util.propertycache

521

def nodemap(self):

524

def nodemap(self):

522

if self.index:

525

if self.index:

523

# populate mapping down to the initial node

526

# populate mapping down to the initial node

524

node0 = self.index[0][7] # get around changelog filtering

527

node0 = self.index[0][7] # get around changelog filtering

525

self.rev(node0)

528

self.rev(node0)

526

return self._nodecache

529

return self._nodecache

527

530

528

def hasnode(self, node):

531

def hasnode(self, node):

529

try:

532

try:

530

self.rev(node)

533

self.rev(node)

531

return True

534

return True

532

except KeyError:

535

except KeyError:

533

return False

536

return False

534

537

535

def candelta(self, baserev, rev):

538

def candelta(self, baserev, rev):

536

"""whether two revisions (baserev, rev) can be delta-ed or not"""

539

"""whether two revisions (baserev, rev) can be delta-ed or not"""

537

# Disable delta if either rev requires a content-changing flag

540

# Disable delta if either rev requires a content-changing flag

538

# processor (ex. LFS). This is because such flag processor can alter

541

# processor (ex. LFS). This is because such flag processor can alter

539

# the rawtext content that the delta will be based on, and two clients

542

# the rawtext content that the delta will be based on, and two clients

540

# could have a same revlog node with different flags (i.e. different

543

# could have a same revlog node with different flags (i.e. different

541

# rawtext contents) and the delta could be incompatible.

544

# rawtext contents) and the delta could be incompatible.

542

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

545

if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)

543

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

546

or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):

544

return False

547

return False

545

return True

548

return True

546

549

547

def clearcaches(self):

550

def clearcaches(self):

548

self._revisioncache = None

551

self._revisioncache = None

549

self._chainbasecache.clear()

552

self._chainbasecache.clear()

550

self._chunkcache = (0, '')

553

self._chunkcache = (0, '')

551

self._pcache = {}

554

self._pcache = {}

552

555

553

try:

556

try:

554

self._nodecache.clearcaches()

557

self._nodecache.clearcaches()

555

except AttributeError:

558

except AttributeError:

556

self._nodecache = {nullid: nullrev}

559

self._nodecache = {nullid: nullrev}

557

self._nodepos = None

560

self._nodepos = None

558

561

559

def rev(self, node):

562

def rev(self, node):

560

try:

563

try:

561

return self._nodecache[node]

564

return self._nodecache[node]

562

except TypeError:

565

except TypeError:

563

raise

566

raise

564

except error.RevlogError:

567

except error.RevlogError:

565

# parsers.c radix tree lookup failed

568

# parsers.c radix tree lookup failed

566

if node == wdirid or node in wdirfilenodeids:

569

if node == wdirid or node in wdirfilenodeids:

567

raise error.WdirUnsupported

570

raise error.WdirUnsupported

568

raise error.LookupError(node, self.indexfile, _('no node'))

571

raise error.LookupError(node, self.indexfile, _('no node'))

569

except KeyError:

572

except KeyError:

570

# pure python cache lookup failed

573

# pure python cache lookup failed

571

n = self._nodecache

574

n = self._nodecache

572

i = self.index

575

i = self.index

573

p = self._nodepos

576

p = self._nodepos

574

if p is None:

577

if p is None:

575

p = len(i) - 1

578

p = len(i) - 1

576

else:

579

else:

577

assert p < len(i)

580

assert p < len(i)

578

for r in pycompat.xrange(p, -1, -1):

581

for r in pycompat.xrange(p, -1, -1):

579

v = i[r][7]

582

v = i[r][7]

580

n[v] = r

583

n[v] = r

581

if v == node:

584

if v == node:

582

self._nodepos = r - 1

585

self._nodepos = r - 1

583

return r

586

return r

584

if node == wdirid or node in wdirfilenodeids:

587

if node == wdirid or node in wdirfilenodeids:

585

raise error.WdirUnsupported

588

raise error.WdirUnsupported

586

raise error.LookupError(node, self.indexfile, _('no node'))

589

raise error.LookupError(node, self.indexfile, _('no node'))

587

590

588

# Accessors for index entries.

591

# Accessors for index entries.

589

592

590

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

593

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

591

# are flags.

594

# are flags.

592

def start(self, rev):

595

def start(self, rev):

593

return int(self.index[rev][0] >> 16)

596

return int(self.index[rev][0] >> 16)

594

597

595

def flags(self, rev):

598

def flags(self, rev):

596

return self.index[rev][0] & 0xFFFF

599

return self.index[rev][0] & 0xFFFF

597

600

598

def length(self, rev):

601

def length(self, rev):

599

return self.index[rev][1]

602

return self.index[rev][1]

600

603

601

def rawsize(self, rev):

604

def rawsize(self, rev):

602

"""return the length of the uncompressed text for a given revision"""

605

"""return the length of the uncompressed text for a given revision"""

603

l = self.index[rev][2]

606

l = self.index[rev][2]

604

if l >= 0:

607

if l >= 0:

605

return l

608

return l

606

609

607

t = self.revision(rev, raw=True)

610

t = self.revision(rev, raw=True)

608

return len(t)

611

return len(t)

609

612

610

def size(self, rev):

613

def size(self, rev):

611

"""length of non-raw text (processed by a "read" flag processor)"""

614

"""length of non-raw text (processed by a "read" flag processor)"""

612

# fast path: if no "read" flag processor could change the content,

615

# fast path: if no "read" flag processor could change the content,

613

# size is rawsize. note: ELLIPSIS is known to not change the content.

616

# size is rawsize. note: ELLIPSIS is known to not change the content.

614

flags = self.flags(rev)

617

flags = self.flags(rev)

615

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

618

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

616

return self.rawsize(rev)

619

return self.rawsize(rev)

617

620

618

return len(self.revision(rev, raw=False))

621

return len(self.revision(rev, raw=False))

619

622

620

def chainbase(self, rev):

623

def chainbase(self, rev):

621

base = self._chainbasecache.get(rev)

624

base = self._chainbasecache.get(rev)

622

if base is not None:

625

if base is not None:

623

return base

626

return base

624

627

625

index = self.index

628

index = self.index

626

iterrev = rev

629

iterrev = rev

627

base = index[iterrev][3]

630

base = index[iterrev][3]

628

while base != iterrev:

631

while base != iterrev:

629

iterrev = base

632

iterrev = base

630

base = index[iterrev][3]

633

base = index[iterrev][3]

631

634

632

self._chainbasecache[rev] = base

635

self._chainbasecache[rev] = base

633

return base

636

return base

634

637

635

def linkrev(self, rev):

638

def linkrev(self, rev):

636

return self.index[rev][4]

639

return self.index[rev][4]

637

640

638

def parentrevs(self, rev):

641

def parentrevs(self, rev):

639

try:

642

try:

640

entry = self.index[rev]

643

entry = self.index[rev]

641

except IndexError:

644

except IndexError:

642

if rev == wdirrev:

645

if rev == wdirrev:

643

raise error.WdirUnsupported

646

raise error.WdirUnsupported

644

raise

647

raise

645

648

646

return entry[5], entry[6]

649

return entry[5], entry[6]

647

650

648

def node(self, rev):

651

def node(self, rev):

649

try:

652

try:

650

return self.index[rev][7]

653

return self.index[rev][7]

651

except IndexError:

654

except IndexError:

652

if rev == wdirrev:

655

if rev == wdirrev:

653

raise error.WdirUnsupported

656

raise error.WdirUnsupported

654

raise

657

raise

655

658

656

# Derived from index values.

659

# Derived from index values.

657

660

658

def end(self, rev):

661

def end(self, rev):

659

return self.start(rev) + self.length(rev)

662

return self.start(rev) + self.length(rev)

660

663

661

def parents(self, node):

664

def parents(self, node):

662

i = self.index

665

i = self.index

663

d = i[self.rev(node)]

666

d = i[self.rev(node)]

664

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

667

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

665

668

666

def chainlen(self, rev):

669

def chainlen(self, rev):

667

return self._chaininfo(rev)[0]

670

return self._chaininfo(rev)[0]

668

671

669

def _chaininfo(self, rev):

672

def _chaininfo(self, rev):

670

chaininfocache = self._chaininfocache

673

chaininfocache = self._chaininfocache

671

if rev in chaininfocache:

674

if rev in chaininfocache:

672

return chaininfocache[rev]

675

return chaininfocache[rev]

673

index = self.index

676

index = self.index

674

generaldelta = self._generaldelta

677

generaldelta = self._generaldelta

675

iterrev = rev

678

iterrev = rev

676

e = index[iterrev]

679

e = index[iterrev]

677

clen = 0

680

clen = 0

678

compresseddeltalen = 0

681

compresseddeltalen = 0

679

while iterrev != e[3]:

682

while iterrev != e[3]:

680

clen += 1

683

clen += 1

681

compresseddeltalen += e[1]

684

compresseddeltalen += e[1]

682

if generaldelta:

685

if generaldelta:

683

iterrev = e[3]

686

iterrev = e[3]

684

else:

687

else:

685

iterrev -= 1

688

iterrev -= 1

686

if iterrev in chaininfocache:

689

if iterrev in chaininfocache:

687

t = chaininfocache[iterrev]

690

t = chaininfocache[iterrev]

688

clen += t[0]

691

clen += t[0]

689

compresseddeltalen += t[1]

692

compresseddeltalen += t[1]

690

break

693

break

691

e = index[iterrev]

694

e = index[iterrev]

692

else:

695

else:

693

# Add text length of base since decompressing that also takes

696

# Add text length of base since decompressing that also takes

694

# work. For cache hits the length is already included.

697

# work. For cache hits the length is already included.

695

compresseddeltalen += e[1]

698

compresseddeltalen += e[1]

696

r = (clen, compresseddeltalen)

699

r = (clen, compresseddeltalen)

697

chaininfocache[rev] = r

700

chaininfocache[rev] = r

698

return r

701

return r

699

702

700

def _deltachain(self, rev, stoprev=None):

703

def _deltachain(self, rev, stoprev=None):

701

"""Obtain the delta chain for a revision.

704

"""Obtain the delta chain for a revision.

702

705

703

``stoprev`` specifies a revision to stop at. If not specified, we

706

``stoprev`` specifies a revision to stop at. If not specified, we

704

stop at the base of the chain.

707

stop at the base of the chain.

705

708

706

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

709

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

707

revs in ascending order and ``stopped`` is a bool indicating whether

710

revs in ascending order and ``stopped`` is a bool indicating whether

708

``stoprev`` was hit.

711

``stoprev`` was hit.

709

"""

712

"""

710

# Try C implementation.

713

# Try C implementation.

711

try:

714

try:

712

return self.index.deltachain(rev, stoprev, self._generaldelta)

715

return self.index.deltachain(rev, stoprev, self._generaldelta)

713

except AttributeError:

716

except AttributeError:

714

pass

717

pass

715

718

716

chain = []

719

chain = []

717

720

718

# Alias to prevent attribute lookup in tight loop.

721

# Alias to prevent attribute lookup in tight loop.

719

index = self.index

722

index = self.index

720

generaldelta = self._generaldelta

723

generaldelta = self._generaldelta

721

724

722

iterrev = rev

725

iterrev = rev

723

e = index[iterrev]

726

e = index[iterrev]

724

while iterrev != e[3] and iterrev != stoprev:

727

while iterrev != e[3] and iterrev != stoprev:

725

chain.append(iterrev)

728

chain.append(iterrev)

726

if generaldelta:

729

if generaldelta:

727

iterrev = e[3]

730

iterrev = e[3]

728

else:

731

else:

729

iterrev -= 1

732

iterrev -= 1

730

e = index[iterrev]

733

e = index[iterrev]

731

734

732

if iterrev == stoprev:

735

if iterrev == stoprev:

733

stopped = True

736

stopped = True

734

else:

737

else:

735

chain.append(iterrev)

738

chain.append(iterrev)

736

stopped = False

739

stopped = False

737

740

738

chain.reverse()

741

chain.reverse()

739

return chain, stopped

742

return chain, stopped

740

743

741

def ancestors(self, revs, stoprev=0, inclusive=False):

744

def ancestors(self, revs, stoprev=0, inclusive=False):

742

"""Generate the ancestors of 'revs' in reverse topological order.

745

"""Generate the ancestors of 'revs' in reverse topological order.

743

Does not generate revs lower than stoprev.

746

Does not generate revs lower than stoprev.

744

747

745

See the documentation for ancestor.lazyancestors for more details."""

748

See the documentation for ancestor.lazyancestors for more details."""

746

749

747

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

750

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

748

inclusive=inclusive)

751

inclusive=inclusive)

749

752

750

def descendants(self, revs):

753

def descendants(self, revs):

751

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

754

return dagop.descendantrevs(revs, self.revs, self.parentrevs)

752

755

753

def findcommonmissing(self, common=None, heads=None):

756

def findcommonmissing(self, common=None, heads=None):

754

"""Return a tuple of the ancestors of common and the ancestors of heads

757

"""Return a tuple of the ancestors of common and the ancestors of heads

755

that are not ancestors of common. In revset terminology, we return the

758

that are not ancestors of common. In revset terminology, we return the

756

tuple:

759

tuple:

757

760

758

::common, (::heads) - (::common)

761

::common, (::heads) - (::common)

759

762

760

The list is sorted by revision number, meaning it is

763

The list is sorted by revision number, meaning it is

761

topologically sorted.

764

topologically sorted.

762

765

763

'heads' and 'common' are both lists of node IDs. If heads is

766

'heads' and 'common' are both lists of node IDs. If heads is

764

not supplied, uses all of the revlog's heads. If common is not

767

not supplied, uses all of the revlog's heads. If common is not

765

supplied, uses nullid."""

768

supplied, uses nullid."""

766

if common is None:

769

if common is None:

767

common = [nullid]

770

common = [nullid]

768

if heads is None:

771

if heads is None:

769

heads = self.heads()

772

heads = self.heads()

770

773

771

common = [self.rev(n) for n in common]

774

common = [self.rev(n) for n in common]

772

heads = [self.rev(n) for n in heads]

775

heads = [self.rev(n) for n in heads]

773

776

774

# we want the ancestors, but inclusive

777

# we want the ancestors, but inclusive

775

class lazyset(object):

778

class lazyset(object):

776

def __init__(self, lazyvalues):

779

def __init__(self, lazyvalues):

777

self.addedvalues = set()

780

self.addedvalues = set()

778

self.lazyvalues = lazyvalues

781

self.lazyvalues = lazyvalues

779

782

780

def __contains__(self, value):

783

def __contains__(self, value):

781

return value in self.addedvalues or value in self.lazyvalues

784

return value in self.addedvalues or value in self.lazyvalues

782

785

783

def __iter__(self):

786

def __iter__(self):

784

added = self.addedvalues

787

added = self.addedvalues

785

for r in added:

788

for r in added:

786

yield r

789

yield r

787

for r in self.lazyvalues:

790

for r in self.lazyvalues:

788

if not r in added:

791

if not r in added:

789

yield r

792

yield r

790

793

791

def add(self, value):

794

def add(self, value):

792

self.addedvalues.add(value)

795

self.addedvalues.add(value)

793

796

794

def update(self, values):

797

def update(self, values):

795

self.addedvalues.update(values)

798

self.addedvalues.update(values)

796

799

797

has = lazyset(self.ancestors(common))

800

has = lazyset(self.ancestors(common))

798

has.add(nullrev)

801

has.add(nullrev)

799

has.update(common)

802

has.update(common)

800

803

801

# take all ancestors from heads that aren't in has

804

# take all ancestors from heads that aren't in has

802

missing = set()

805

missing = set()

803

visit = collections.deque(r for r in heads if r not in has)

806

visit = collections.deque(r for r in heads if r not in has)

804

while visit:

807

while visit:

805

r = visit.popleft()

808

r = visit.popleft()

806

if r in missing:

809

if r in missing:

807

continue

810

continue

808

else:

811

else:

809

missing.add(r)

812

missing.add(r)

810

for p in self.parentrevs(r):

813

for p in self.parentrevs(r):

811

if p not in has:

814

if p not in has:

812

visit.append(p)

815

visit.append(p)

813

missing = list(missing)

816

missing = list(missing)

814

missing.sort()

817

missing.sort()

815

return has, [self.node(miss) for miss in missing]

818

return has, [self.node(miss) for miss in missing]

816

819

817

def incrementalmissingrevs(self, common=None):

820

def incrementalmissingrevs(self, common=None):

818

"""Return an object that can be used to incrementally compute the

821

"""Return an object that can be used to incrementally compute the

819

revision numbers of the ancestors of arbitrary sets that are not

822

revision numbers of the ancestors of arbitrary sets that are not

820

ancestors of common. This is an ancestor.incrementalmissingancestors

823

ancestors of common. This is an ancestor.incrementalmissingancestors

821

object.

824

object.

822

825

823

'common' is a list of revision numbers. If common is not supplied, uses

826

'common' is a list of revision numbers. If common is not supplied, uses

824

nullrev.

827

nullrev.

825

"""

828

"""

826

if common is None:

829

if common is None:

827

common = [nullrev]

830

common = [nullrev]

828

831

829

return ancestor.incrementalmissingancestors(self.parentrevs, common)

832

return ancestor.incrementalmissingancestors(self.parentrevs, common)

830

833

831

def findmissingrevs(self, common=None, heads=None):

834

def findmissingrevs(self, common=None, heads=None):

832

"""Return the revision numbers of the ancestors of heads that

835

"""Return the revision numbers of the ancestors of heads that

833

are not ancestors of common.

836

are not ancestors of common.

834

837

835

More specifically, return a list of revision numbers corresponding to

838

More specifically, return a list of revision numbers corresponding to

836

nodes N such that every N satisfies the following constraints:

839

nodes N such that every N satisfies the following constraints:

837

840

838

1. N is an ancestor of some node in 'heads'

841

1. N is an ancestor of some node in 'heads'

839

2. N is not an ancestor of any node in 'common'

842

2. N is not an ancestor of any node in 'common'

840

843

841

The list is sorted by revision number, meaning it is

844

The list is sorted by revision number, meaning it is

842

topologically sorted.

845

topologically sorted.

843

846

844

'heads' and 'common' are both lists of revision numbers. If heads is

847

'heads' and 'common' are both lists of revision numbers. If heads is

845

not supplied, uses all of the revlog's heads. If common is not

848

not supplied, uses all of the revlog's heads. If common is not

846

supplied, uses nullid."""

849

supplied, uses nullid."""

847

if common is None:

850

if common is None:

848

common = [nullrev]

851

common = [nullrev]

849

if heads is None:

852

if heads is None:

850

heads = self.headrevs()

853

heads = self.headrevs()

851

854

852

inc = self.incrementalmissingrevs(common=common)

855

inc = self.incrementalmissingrevs(common=common)

853

return inc.missingancestors(heads)

856

return inc.missingancestors(heads)

854

857

855

def findmissing(self, common=None, heads=None):

858

def findmissing(self, common=None, heads=None):

856

"""Return the ancestors of heads that are not ancestors of common.

859

"""Return the ancestors of heads that are not ancestors of common.

857

860

858

More specifically, return a list of nodes N such that every N

861

More specifically, return a list of nodes N such that every N

859

satisfies the following constraints:

862

satisfies the following constraints:

860

863

861

1. N is an ancestor of some node in 'heads'

864

1. N is an ancestor of some node in 'heads'

862

2. N is not an ancestor of any node in 'common'

865

2. N is not an ancestor of any node in 'common'

863

866

864

The list is sorted by revision number, meaning it is

867

The list is sorted by revision number, meaning it is

865

topologically sorted.

868

topologically sorted.

866

869

867

'heads' and 'common' are both lists of node IDs. If heads is

870

'heads' and 'common' are both lists of node IDs. If heads is

868

not supplied, uses all of the revlog's heads. If common is not

871

not supplied, uses all of the revlog's heads. If common is not

869

supplied, uses nullid."""

872

supplied, uses nullid."""

870

if common is None:

873

if common is None:

871

common = [nullid]

874

common = [nullid]

872

if heads is None:

875

if heads is None:

873

heads = self.heads()

876

heads = self.heads()

874

877

875

common = [self.rev(n) for n in common]

878

common = [self.rev(n) for n in common]

876

heads = [self.rev(n) for n in heads]

879

heads = [self.rev(n) for n in heads]

877

880

878

inc = self.incrementalmissingrevs(common=common)

881

inc = self.incrementalmissingrevs(common=common)

879

return [self.node(r) for r in inc.missingancestors(heads)]

882

return [self.node(r) for r in inc.missingancestors(heads)]

880

883

881

def nodesbetween(self, roots=None, heads=None):

884

def nodesbetween(self, roots=None, heads=None):

882

"""Return a topological path from 'roots' to 'heads'.

885

"""Return a topological path from 'roots' to 'heads'.

883

886

884

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

887

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

885

topologically sorted list of all nodes N that satisfy both of

888

topologically sorted list of all nodes N that satisfy both of

886

these constraints:

889

these constraints:

887

890

888

1. N is a descendant of some node in 'roots'

891

1. N is a descendant of some node in 'roots'

889

2. N is an ancestor of some node in 'heads'

892

2. N is an ancestor of some node in 'heads'

890

893

891

Every node is considered to be both a descendant and an ancestor

894

Every node is considered to be both a descendant and an ancestor

892

of itself, so every reachable node in 'roots' and 'heads' will be

895

of itself, so every reachable node in 'roots' and 'heads' will be

893

included in 'nodes'.

896

included in 'nodes'.

894

897

895

'outroots' is the list of reachable nodes in 'roots', i.e., the

898

'outroots' is the list of reachable nodes in 'roots', i.e., the

896

subset of 'roots' that is returned in 'nodes'. Likewise,

899

subset of 'roots' that is returned in 'nodes'. Likewise,

897

'outheads' is the subset of 'heads' that is also in 'nodes'.

900

'outheads' is the subset of 'heads' that is also in 'nodes'.

898

901

899

'roots' and 'heads' are both lists of node IDs. If 'roots' is

902

'roots' and 'heads' are both lists of node IDs. If 'roots' is

900

unspecified, uses nullid as the only root. If 'heads' is

903

unspecified, uses nullid as the only root. If 'heads' is

901

unspecified, uses list of all of the revlog's heads."""

904

unspecified, uses list of all of the revlog's heads."""

902

nonodes = ([], [], [])

905

nonodes = ([], [], [])

903

if roots is not None:

906

if roots is not None:

904

roots = list(roots)

907

roots = list(roots)

905

if not roots:

908

if not roots:

906

return nonodes

909

return nonodes

907

lowestrev = min([self.rev(n) for n in roots])

910

lowestrev = min([self.rev(n) for n in roots])

908

else:

911

else:

909

roots = [nullid] # Everybody's a descendant of nullid

912

roots = [nullid] # Everybody's a descendant of nullid

910

lowestrev = nullrev

913

lowestrev = nullrev

911

if (lowestrev == nullrev) and (heads is None):

914

if (lowestrev == nullrev) and (heads is None):

912

# We want _all_ the nodes!

915

# We want _all_ the nodes!

913

return ([self.node(r) for r in self], [nullid], list(self.heads()))

916

return ([self.node(r) for r in self], [nullid], list(self.heads()))

914

if heads is None:

917

if heads is None:

915

# All nodes are ancestors, so the latest ancestor is the last

918

# All nodes are ancestors, so the latest ancestor is the last

916

# node.

919

# node.

917

highestrev = len(self) - 1

920

highestrev = len(self) - 1

918

# Set ancestors to None to signal that every node is an ancestor.

921

# Set ancestors to None to signal that every node is an ancestor.

919

ancestors = None

922

ancestors = None

920

# Set heads to an empty dictionary for later discovery of heads

923

# Set heads to an empty dictionary for later discovery of heads

921

heads = {}

924

heads = {}

922

else:

925

else:

923

heads = list(heads)

926

heads = list(heads)

924

if not heads:

927

if not heads:

925

return nonodes

928

return nonodes

926

ancestors = set()

929

ancestors = set()

927

# Turn heads into a dictionary so we can remove 'fake' heads.

930

# Turn heads into a dictionary so we can remove 'fake' heads.

928

# Also, later we will be using it to filter out the heads we can't

931

# Also, later we will be using it to filter out the heads we can't

929

# find from roots.

932

# find from roots.

930

heads = dict.fromkeys(heads, False)

933

heads = dict.fromkeys(heads, False)

931

# Start at the top and keep marking parents until we're done.

934

# Start at the top and keep marking parents until we're done.

932

nodestotag = set(heads)

935

nodestotag = set(heads)

933

# Remember where the top was so we can use it as a limit later.

936

# Remember where the top was so we can use it as a limit later.

934

highestrev = max([self.rev(n) for n in nodestotag])

937

highestrev = max([self.rev(n) for n in nodestotag])

935

while nodestotag:

938

while nodestotag:

936

# grab a node to tag

939

# grab a node to tag

937

n = nodestotag.pop()

940

n = nodestotag.pop()

938

# Never tag nullid

941

# Never tag nullid

939

if n == nullid:

942

if n == nullid:

940

continue

943

continue

941

# A node's revision number represents its place in a

944

# A node's revision number represents its place in a

942

# topologically sorted list of nodes.

945

# topologically sorted list of nodes.

943

r = self.rev(n)

946

r = self.rev(n)

944

if r >= lowestrev:

947

if r >= lowestrev:

945

if n not in ancestors:

948

if n not in ancestors:

946

# If we are possibly a descendant of one of the roots

949

# If we are possibly a descendant of one of the roots

947

# and we haven't already been marked as an ancestor

950

# and we haven't already been marked as an ancestor

948

ancestors.add(n) # Mark as ancestor

951

ancestors.add(n) # Mark as ancestor

949

# Add non-nullid parents to list of nodes to tag.

952

# Add non-nullid parents to list of nodes to tag.

950

nodestotag.update([p for p in self.parents(n) if

953

nodestotag.update([p for p in self.parents(n) if

951

p != nullid])

954

p != nullid])

952

elif n in heads: # We've seen it before, is it a fake head?

955

elif n in heads: # We've seen it before, is it a fake head?

953

# So it is, real heads should not be the ancestors of

956

# So it is, real heads should not be the ancestors of

954

# any other heads.

957

# any other heads.

955

heads.pop(n)

958

heads.pop(n)

956

if not ancestors:

959

if not ancestors:

957

return nonodes

960

return nonodes

958

# Now that we have our set of ancestors, we want to remove any

961

# Now that we have our set of ancestors, we want to remove any

959

# roots that are not ancestors.

962

# roots that are not ancestors.

960

963

961

# If one of the roots was nullid, everything is included anyway.

964

# If one of the roots was nullid, everything is included anyway.

962

if lowestrev > nullrev:

965

if lowestrev > nullrev:

963

# But, since we weren't, let's recompute the lowest rev to not

966

# But, since we weren't, let's recompute the lowest rev to not

964

# include roots that aren't ancestors.

967

# include roots that aren't ancestors.

965

968

966

# Filter out roots that aren't ancestors of heads

969

# Filter out roots that aren't ancestors of heads

967

roots = [root for root in roots if root in ancestors]

970

roots = [root for root in roots if root in ancestors]

968

# Recompute the lowest revision

971

# Recompute the lowest revision

969

if roots:

972

if roots:

970

lowestrev = min([self.rev(root) for root in roots])

973

lowestrev = min([self.rev(root) for root in roots])

971

else:

974

else:

972

# No more roots? Return empty list

975

# No more roots? Return empty list

973

return nonodes

976

return nonodes

974

else:

977

else:

975

# We are descending from nullid, and don't need to care about

978

# We are descending from nullid, and don't need to care about

976

# any other roots.

979

# any other roots.

977

lowestrev = nullrev

980

lowestrev = nullrev

978

roots = [nullid]

981

roots = [nullid]

979

# Transform our roots list into a set.

982

# Transform our roots list into a set.

980

descendants = set(roots)

983

descendants = set(roots)

981

# Also, keep the original roots so we can filter out roots that aren't

984

# Also, keep the original roots so we can filter out roots that aren't

982

# 'real' roots (i.e. are descended from other roots).

985

# 'real' roots (i.e. are descended from other roots).

983

roots = descendants.copy()

986

roots = descendants.copy()

984

# Our topologically sorted list of output nodes.

987

# Our topologically sorted list of output nodes.

985

orderedout = []

988

orderedout = []

986

# Don't start at nullid since we don't want nullid in our output list,

989

# Don't start at nullid since we don't want nullid in our output list,

987

# and if nullid shows up in descendants, empty parents will look like

990

# and if nullid shows up in descendants, empty parents will look like

988

# they're descendants.

991

# they're descendants.

989

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

992

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

990

n = self.node(r)

993

n = self.node(r)

991

isdescendant = False

994

isdescendant = False

992

if lowestrev == nullrev: # Everybody is a descendant of nullid

995

if lowestrev == nullrev: # Everybody is a descendant of nullid

993

isdescendant = True

996

isdescendant = True

994

elif n in descendants:

997

elif n in descendants:

995

# n is already a descendant

998

# n is already a descendant

996

isdescendant = True

999

isdescendant = True

997

# This check only needs to be done here because all the roots

1000

# This check only needs to be done here because all the roots

998

# will start being marked is descendants before the loop.

1001

# will start being marked is descendants before the loop.

999

if n in roots:

1002

if n in roots:

1000

# If n was a root, check if it's a 'real' root.

1003

# If n was a root, check if it's a 'real' root.

1001

p = tuple(self.parents(n))

1004

p = tuple(self.parents(n))

1002

# If any of its parents are descendants, it's not a root.

1005

# If any of its parents are descendants, it's not a root.

1003

if (p[0] in descendants) or (p[1] in descendants):

1006

if (p[0] in descendants) or (p[1] in descendants):

1004

roots.remove(n)

1007

roots.remove(n)

1005

else:

1008

else:

1006

p = tuple(self.parents(n))

1009

p = tuple(self.parents(n))

1007

# A node is a descendant if either of its parents are

1010

# A node is a descendant if either of its parents are

1008

# descendants. (We seeded the dependents list with the roots

1011

# descendants. (We seeded the dependents list with the roots

1009

# up there, remember?)

1012

# up there, remember?)

1010

if (p[0] in descendants) or (p[1] in descendants):

1013

if (p[0] in descendants) or (p[1] in descendants):

1011

descendants.add(n)

1014

descendants.add(n)

1012

isdescendant = True

1015

isdescendant = True

1013

if isdescendant and ((ancestors is None) or (n in ancestors)):

1016

if isdescendant and ((ancestors is None) or (n in ancestors)):

1014

# Only include nodes that are both descendants and ancestors.

1017

# Only include nodes that are both descendants and ancestors.

1015

orderedout.append(n)

1018

orderedout.append(n)

1016

if (ancestors is not None) and (n in heads):

1019

if (ancestors is not None) and (n in heads):

1017

# We're trying to figure out which heads are reachable

1020

# We're trying to figure out which heads are reachable

1018

# from roots.

1021

# from roots.

1019

# Mark this head as having been reached

1022

# Mark this head as having been reached

1020

heads[n] = True

1023

heads[n] = True

1021

elif ancestors is None:

1024

elif ancestors is None:

1022

# Otherwise, we're trying to discover the heads.

1025

# Otherwise, we're trying to discover the heads.

1023

# Assume this is a head because if it isn't, the next step

1026

# Assume this is a head because if it isn't, the next step

1024

# will eventually remove it.

1027

# will eventually remove it.

1025

heads[n] = True

1028

heads[n] = True

1026

# But, obviously its parents aren't.

1029

# But, obviously its parents aren't.

1027

for p in self.parents(n):

1030

for p in self.parents(n):

1028

heads.pop(p, None)

1031

heads.pop(p, None)

1029

heads = [head for head, flag in heads.iteritems() if flag]

1032

heads = [head for head, flag in heads.iteritems() if flag]

1030

roots = list(roots)

1033

roots = list(roots)

1031

assert orderedout

1034

assert orderedout

1032

assert roots

1035

assert roots

1033

assert heads

1036

assert heads

1034

return (orderedout, roots, heads)

1037

return (orderedout, roots, heads)

1035

1038

1036

def headrevs(self):

1039

def headrevs(self):

1037

try:

1040

try:

1038

return self.index.headrevs()

1041

return self.index.headrevs()

1039

except AttributeError:

1042

except AttributeError:

1040

return self._headrevs()

1043

return self._headrevs()

1041

1044

1042

def computephases(self, roots):

1045

def computephases(self, roots):

1043

return self.index.computephasesmapsets(roots)

1046

return self.index.computephasesmapsets(roots)

1044

1047

1045

def _headrevs(self):

1048

def _headrevs(self):

1046

count = len(self)

1049

count = len(self)

1047

if not count:

1050

if not count:

1048

return [nullrev]

1051

return [nullrev]

1049

# we won't iter over filtered rev so nobody is a head at start

1052

# we won't iter over filtered rev so nobody is a head at start

1050

ishead = [0] * (count + 1)

1053

ishead = [0] * (count + 1)

1051

index = self.index

1054

index = self.index

1052

for r in self:

1055

for r in self:

1053

ishead[r] = 1 # I may be an head

1056

ishead[r] = 1 # I may be an head

1054

e = index[r]

1057

e = index[r]

1055

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1058

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1056

return [r for r, val in enumerate(ishead) if val]

1059

return [r for r, val in enumerate(ishead) if val]

1057

1060

1058

def heads(self, start=None, stop=None):

1061

def heads(self, start=None, stop=None):

1059

"""return the list of all nodes that have no children

1062

"""return the list of all nodes that have no children

1060

1063

1061

if start is specified, only heads that are descendants of

1064

if start is specified, only heads that are descendants of

1062

start will be returned

1065

start will be returned

1063

if stop is specified, it will consider all the revs from stop

1066

if stop is specified, it will consider all the revs from stop

1064

as if they had no children

1067

as if they had no children

1065

"""

1068

"""

1066

if start is None and stop is None:

1069

if start is None and stop is None:

1067

if not len(self):

1070

if not len(self):

1068

return [nullid]

1071

return [nullid]

1069

return [self.node(r) for r in self.headrevs()]

1072

return [self.node(r) for r in self.headrevs()]

1070

1073

1071

if start is None:

1074

if start is None:

1072

start = nullrev

1075

start = nullrev

1073

else:

1076

else:

1074

start = self.rev(start)

1077

start = self.rev(start)

1075

1078

1076

stoprevs = set(self.rev(n) for n in stop or [])

1079

stoprevs = set(self.rev(n) for n in stop or [])

1077

1080

1078

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1081

revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,

1079

stoprevs=stoprevs)

1082

stoprevs=stoprevs)

1080

1083

1081

return [self.node(rev) for rev in revs]

1084

return [self.node(rev) for rev in revs]

1082

1085

1083

def children(self, node):

1086

def children(self, node):

1084

"""find the children of a given node"""

1087

"""find the children of a given node"""

1085

c = []

1088

c = []

1086

p = self.rev(node)

1089

p = self.rev(node)

1087

for r in self.revs(start=p + 1):

1090

for r in self.revs(start=p + 1):

1088

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1091

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1089

if prevs:

1092

if prevs:

1090

for pr in prevs:

1093

for pr in prevs:

1091

if pr == p:

1094

if pr == p:

1092

c.append(self.node(r))

1095

c.append(self.node(r))

1093

elif p == nullrev:

1096

elif p == nullrev:

1094

c.append(self.node(r))

1097

c.append(self.node(r))

1095

return c

1098

return c

1096

1099

1097

def commonancestorsheads(self, a, b):

1100

def commonancestorsheads(self, a, b):

1098

"""calculate all the heads of the common ancestors of nodes a and b"""

1101

"""calculate all the heads of the common ancestors of nodes a and b"""

1099

a, b = self.rev(a), self.rev(b)

1102

a, b = self.rev(a), self.rev(b)

1100

ancs = self._commonancestorsheads(a, b)

1103

ancs = self._commonancestorsheads(a, b)

1101

return pycompat.maplist(self.node, ancs)

1104

return pycompat.maplist(self.node, ancs)

1102

1105

1103

def _commonancestorsheads(self, *revs):

1106

def _commonancestorsheads(self, *revs):

1104

"""calculate all the heads of the common ancestors of revs"""

1107

"""calculate all the heads of the common ancestors of revs"""

1105

try:

1108

try:

1106

ancs = self.index.commonancestorsheads(*revs)

1109

ancs = self.index.commonancestorsheads(*revs)

1107

except (AttributeError, OverflowError): # C implementation failed

1110

except (AttributeError, OverflowError): # C implementation failed

1108

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1111

ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)

1109

return ancs

1112

return ancs

1110

1113

1111

def isancestor(self, a, b):

1114

def isancestor(self, a, b):

1112

"""return True if node a is an ancestor of node b

1115

"""return True if node a is an ancestor of node b

1113

1116

1114

A revision is considered an ancestor of itself."""

1117

A revision is considered an ancestor of itself."""

1115

a, b = self.rev(a), self.rev(b)

1118

a, b = self.rev(a), self.rev(b)

1116

return self.isancestorrev(a, b)

1119

return self.isancestorrev(a, b)

1117

1120

1118

def isancestorrev(self, a, b):

1121

def isancestorrev(self, a, b):

1119

"""return True if revision a is an ancestor of revision b

1122

"""return True if revision a is an ancestor of revision b

1120

1123

1121

A revision is considered an ancestor of itself.

1124

A revision is considered an ancestor of itself.

1122

1125

1123

The implementation of this is trivial but the use of

1126

The implementation of this is trivial but the use of

1124

commonancestorsheads is not."""

1127

commonancestorsheads is not."""

1125

if a == nullrev:

1128

if a == nullrev:

1126

return True

1129

return True

1127

elif a == b:

1130

elif a == b:

1128

return True

1131

return True

1129

elif a > b:

1132

elif a > b:

1130

return False

1133

return False

1131

return a in self._commonancestorsheads(a, b)

1134

return a in self._commonancestorsheads(a, b)

1132

1135

1133

def ancestor(self, a, b):

1136

def ancestor(self, a, b):

1134

"""calculate the "best" common ancestor of nodes a and b"""

1137

"""calculate the "best" common ancestor of nodes a and b"""

1135

1138

1136

a, b = self.rev(a), self.rev(b)

1139

a, b = self.rev(a), self.rev(b)

1137

try:

1140

try:

1138

ancs = self.index.ancestors(a, b)

1141

ancs = self.index.ancestors(a, b)

1139

except (AttributeError, OverflowError):

1142

except (AttributeError, OverflowError):

1140

ancs = ancestor.ancestors(self.parentrevs, a, b)

1143

ancs = ancestor.ancestors(self.parentrevs, a, b)

1141

if ancs:

1144

if ancs:

1142

# choose a consistent winner when there's a tie

1145

# choose a consistent winner when there's a tie

1143

return min(map(self.node, ancs))

1146

return min(map(self.node, ancs))

1144

return nullid

1147

return nullid

1145

1148

1146

def _match(self, id):

1149

def _match(self, id):

1147

if isinstance(id, int):

1150

if isinstance(id, int):

1148

# rev

1151

# rev

1149

return self.node(id)

1152

return self.node(id)

1150

if len(id) == 20:

1153

if len(id) == 20:

1151

# possibly a binary node

1154

# possibly a binary node

1152

# odds of a binary node being all hex in ASCII are 1 in 10**25

1155

# odds of a binary node being all hex in ASCII are 1 in 10**25

1153

try:

1156

try:

1154

node = id

1157

node = id

1155

self.rev(node) # quick search the index

1158

self.rev(node) # quick search the index

1156

return node

1159

return node

1157

except error.LookupError:

1160

except error.LookupError:

1158

pass # may be partial hex id

1161

pass # may be partial hex id

1159

try:

1162

try:

1160

# str(rev)

1163

# str(rev)

1161

rev = int(id)

1164

rev = int(id)

1162

if "%d" % rev != id:

1165

if "%d" % rev != id:

1163

raise ValueError

1166

raise ValueError

1164

if rev < 0:

1167

if rev < 0:

1165

rev = len(self) + rev

1168

rev = len(self) + rev

1166

if rev < 0 or rev >= len(self):

1169

if rev < 0 or rev >= len(self):

1167

raise ValueError

1170

raise ValueError

1168

return self.node(rev)

1171

return self.node(rev)

1169

except (ValueError, OverflowError):

1172

except (ValueError, OverflowError):

1170

pass

1173

pass

1171

if len(id) == 40:

1174

if len(id) == 40:

1172

try:

1175

try:

1173

# a full hex nodeid?

1176

# a full hex nodeid?

1174

node = bin(id)

1177

node = bin(id)

1175

self.rev(node)

1178

self.rev(node)

1176

return node

1179

return node

1177

except (TypeError, error.LookupError):

1180

except (TypeError, error.LookupError):

1178

pass

1181

pass

1179

1182

1180

def _partialmatch(self, id):

1183

def _partialmatch(self, id):

1181

# we don't care wdirfilenodeids as they should be always full hash

1184

# we don't care wdirfilenodeids as they should be always full hash

1182

maybewdir = wdirhex.startswith(id)

1185

maybewdir = wdirhex.startswith(id)

1183

try:

1186

try:

1184

partial = self.index.partialmatch(id)

1187

partial = self.index.partialmatch(id)

1185

if partial and self.hasnode(partial):

1188

if partial and self.hasnode(partial):

1186

if maybewdir:

1189

if maybewdir:

1187

# single 'ff...' match in radix tree, ambiguous with wdir

1190

# single 'ff...' match in radix tree, ambiguous with wdir

1188

raise error.RevlogError

1191

raise error.RevlogError

1189

return partial

1192

return partial

1190

if maybewdir:

1193

if maybewdir:

1191

# no 'ff...' match in radix tree, wdir identified

1194

# no 'ff...' match in radix tree, wdir identified

1192

raise error.WdirUnsupported

1195

raise error.WdirUnsupported

1193

return None

1196

return None

1194

except error.RevlogError:

1197

except error.RevlogError:

1195

# parsers.c radix tree lookup gave multiple matches

1198

# parsers.c radix tree lookup gave multiple matches

1196

# fast path: for unfiltered changelog, radix tree is accurate

1199

# fast path: for unfiltered changelog, radix tree is accurate

1197

if not getattr(self, 'filteredrevs', None):

1200

if not getattr(self, 'filteredrevs', None):

1198

raise error.AmbiguousPrefixLookupError(

1201

raise error.AmbiguousPrefixLookupError(

1199

id, self.indexfile, _('ambiguous identifier'))

1202

id, self.indexfile, _('ambiguous identifier'))

1200

# fall through to slow path that filters hidden revisions

1203

# fall through to slow path that filters hidden revisions

1201

except (AttributeError, ValueError):

1204

except (AttributeError, ValueError):

1202

# we are pure python, or key was too short to search radix tree

1205

# we are pure python, or key was too short to search radix tree

1203

pass

1206

pass

1204

1207

1205

if id in self._pcache:

1208

if id in self._pcache:

1206

return self._pcache[id]

1209

return self._pcache[id]

1207

1210

1208

if len(id) <= 40:

1211

if len(id) <= 40:

1209

try:

1212

try:

1210

# hex(node)[:...]

1213

# hex(node)[:...]

1211

l = len(id) // 2 # grab an even number of digits

1214

l = len(id) // 2 # grab an even number of digits

1212

prefix = bin(id[:l * 2])

1215

prefix = bin(id[:l * 2])

1213

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1216

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1214

nl = [n for n in nl if hex(n).startswith(id) and

1217

nl = [n for n in nl if hex(n).startswith(id) and

1215

self.hasnode(n)]

1218

self.hasnode(n)]

1216

if nullhex.startswith(id):

1219

if nullhex.startswith(id):

1217

nl.append(nullid)

1220

nl.append(nullid)

1218

if len(nl) > 0:

1221

if len(nl) > 0:

1219

if len(nl) == 1 and not maybewdir:

1222

if len(nl) == 1 and not maybewdir:

1220

self._pcache[id] = nl[0]

1223

self._pcache[id] = nl[0]

1221

return nl[0]

1224

return nl[0]

1222

raise error.AmbiguousPrefixLookupError(

1225

raise error.AmbiguousPrefixLookupError(

1223

id, self.indexfile, _('ambiguous identifier'))

1226

id, self.indexfile, _('ambiguous identifier'))

1224

if maybewdir:

1227

if maybewdir:

1225

raise error.WdirUnsupported

1228

raise error.WdirUnsupported

1226

return None

1229

return None

1227

except TypeError:

1230

except TypeError:

1228

pass

1231

pass

1229

1232

1230

def lookup(self, id):

1233

def lookup(self, id):

1231

"""locate a node based on:

1234

"""locate a node based on:

1232

- revision number or str(revision number)

1235

- revision number or str(revision number)

1233

- nodeid or subset of hex nodeid

1236

- nodeid or subset of hex nodeid

1234

"""

1237

"""

1235

n = self._match(id)

1238

n = self._match(id)

1236

if n is not None:

1239

if n is not None:

1237

return n

1240

return n

1238

n = self._partialmatch(id)

1241

n = self._partialmatch(id)

1239

if n:

1242

if n:

1240

return n

1243

return n

1241

1244

1242

raise error.LookupError(id, self.indexfile, _('no match found'))

1245

raise error.LookupError(id, self.indexfile, _('no match found'))

1243

1246

1244

def shortest(self, node, minlength=1):

1247

def shortest(self, node, minlength=1):

1245

"""Find the shortest unambiguous prefix that matches node."""

1248

"""Find the shortest unambiguous prefix that matches node."""

1246

def isvalid(prefix):

1249

def isvalid(prefix):

1247

try:

1250

try:

1248

node = self._partialmatch(prefix)

1251

node = self._partialmatch(prefix)

1249

except error.AmbiguousPrefixLookupError:

1252

except error.AmbiguousPrefixLookupError:

1250

return False

1253

return False

1251

except error.WdirUnsupported:

1254

except error.WdirUnsupported:

1252

# single 'ff...' match

1255

# single 'ff...' match

1253

return True

1256

return True

1254

if node is None:

1257

if node is None:

1255

raise error.LookupError(node, self.indexfile, _('no node'))

1258

raise error.LookupError(node, self.indexfile, _('no node'))

1256

return True

1259

return True

1257

1260

1258

def maybewdir(prefix):

1261

def maybewdir(prefix):

1259

return all(c == 'f' for c in prefix)

1262

return all(c == 'f' for c in prefix)

1260

1263

1261

hexnode = hex(node)

1264

hexnode = hex(node)

1262

1265

1263

def disambiguate(hexnode, minlength):

1266

def disambiguate(hexnode, minlength):

1264

"""Disambiguate against wdirid."""

1267

"""Disambiguate against wdirid."""

1265

for length in range(minlength, 41):

1268

for length in range(minlength, 41):

1266

prefix = hexnode[:length]

1269

prefix = hexnode[:length]

1267

if not maybewdir(prefix):

1270

if not maybewdir(prefix):

1268

return prefix

1271

return prefix

1269

1272

1270

if not getattr(self, 'filteredrevs', None):

1273

if not getattr(self, 'filteredrevs', None):

1271

try:

1274

try:

1272

length = max(self.index.shortest(node), minlength)

1275

length = max(self.index.shortest(node), minlength)

1273

return disambiguate(hexnode, length)

1276

return disambiguate(hexnode, length)

1274

except error.RevlogError:

1277

except error.RevlogError:

1275

if node != wdirid:

1278

if node != wdirid:

1276

raise error.LookupError(node, self.indexfile, _('no node'))

1279

raise error.LookupError(node, self.indexfile, _('no node'))

1277

except AttributeError:

1280

except AttributeError:

1278

# Fall through to pure code

1281

# Fall through to pure code

1279

pass

1282

pass

1280

1283

1281

if node == wdirid:

1284

if node == wdirid:

1282

for length in range(minlength, 41):

1285

for length in range(minlength, 41):

1283

prefix = hexnode[:length]

1286

prefix = hexnode[:length]

1284

if isvalid(prefix):

1287

if isvalid(prefix):

1285

return prefix

1288

return prefix

1286

1289

1287

for length in range(minlength, 41):

1290

for length in range(minlength, 41):

1288

prefix = hexnode[:length]

1291

prefix = hexnode[:length]

1289

if isvalid(prefix):

1292

if isvalid(prefix):

1290

return disambiguate(hexnode, length)

1293

return disambiguate(hexnode, length)

1291

1294

1292

def cmp(self, node, text):

1295

def cmp(self, node, text):

1293

"""compare text with a given file revision

1296

"""compare text with a given file revision

1294

1297

1295

returns True if text is different than what is stored.

1298

returns True if text is different than what is stored.

1296

"""

1299

"""

1297

p1, p2 = self.parents(node)

1300

p1, p2 = self.parents(node)

1298

return storageutil.hashrevisionsha1(text, p1, p2) != node

1301

return storageutil.hashrevisionsha1(text, p1, p2) != node

1299

1302

1300

def _cachesegment(self, offset, data):

1303

def _cachesegment(self, offset, data):

1301

"""Add a segment to the revlog cache.

1304

"""Add a segment to the revlog cache.

1302

1305

1303

Accepts an absolute offset and the data that is at that location.

1306

Accepts an absolute offset and the data that is at that location.

1304

"""

1307

"""

1305

o, d = self._chunkcache

1308

o, d = self._chunkcache

1306

# try to add to existing cache

1309

# try to add to existing cache

1307

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1310

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1308

self._chunkcache = o, d + data

1311

self._chunkcache = o, d + data

1309

else:

1312

else:

1310

self._chunkcache = offset, data

1313

self._chunkcache = offset, data

1311

1314

1312

def _readsegment(self, offset, length, df=None):

1315

def _readsegment(self, offset, length, df=None):

1313

"""Load a segment of raw data from the revlog.

1316

"""Load a segment of raw data from the revlog.

1314

1317

1315

Accepts an absolute offset, length to read, and an optional existing

1318

Accepts an absolute offset, length to read, and an optional existing

1316

file handle to read from.

1319

file handle to read from.

1317

1320

1318

If an existing file handle is passed, it will be seeked and the

1321

If an existing file handle is passed, it will be seeked and the

1319

original seek position will NOT be restored.

1322

original seek position will NOT be restored.

1320

1323

1321

Returns a str or buffer of raw byte data.

1324

Returns a str or buffer of raw byte data.

1322

"""

1325

"""

1323

# Cache data both forward and backward around the requested

1326

# Cache data both forward and backward around the requested

1324

# data, in a fixed size window. This helps speed up operations

1327

# data, in a fixed size window. This helps speed up operations

1325

# involving reading the revlog backwards.

1328

# involving reading the revlog backwards.

1326

cachesize = self._chunkcachesize

1329

cachesize = self._chunkcachesize

1327

realoffset = offset & ~(cachesize - 1)

1330

realoffset = offset & ~(cachesize - 1)

1328

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1331

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1329

- realoffset)

1332

- realoffset)

1330

with self._datareadfp(df) as df:

1333

with self._datareadfp(df) as df:

1331

df.seek(realoffset)

1334

df.seek(realoffset)

1332

d = df.read(reallength)

1335

d = df.read(reallength)

1333

self._cachesegment(realoffset, d)

1336

self._cachesegment(realoffset, d)

1334

if offset != realoffset or reallength != length:

1337

if offset != realoffset or reallength != length:

1335

return util.buffer(d, offset - realoffset, length)

1338

return util.buffer(d, offset - realoffset, length)

1336

return d

1339

return d

1337

1340

1338

def _getsegment(self, offset, length, df=None):

1341

def _getsegment(self, offset, length, df=None):

1339

"""Obtain a segment of raw data from the revlog.

1342

"""Obtain a segment of raw data from the revlog.

1340

1343

1341

Accepts an absolute offset, length of bytes to obtain, and an

1344

Accepts an absolute offset, length of bytes to obtain, and an

1342

optional file handle to the already-opened revlog. If the file

1345

optional file handle to the already-opened revlog. If the file

1343

handle is used, it's original seek position will not be preserved.

1346

handle is used, it's original seek position will not be preserved.

1344

1347

1345

Requests for data may be returned from a cache.

1348

Requests for data may be returned from a cache.

1346

1349

1347

Returns a str or a buffer instance of raw byte data.

1350

Returns a str or a buffer instance of raw byte data.

1348

"""

1351

"""

1349

o, d = self._chunkcache

1352

o, d = self._chunkcache

1350

l = len(d)

1353

l = len(d)

1351

1354

1352

# is it in the cache?

1355

# is it in the cache?

1353

cachestart = offset - o

1356

cachestart = offset - o

1354

cacheend = cachestart + length

1357

cacheend = cachestart + length

1355

if cachestart >= 0 and cacheend <= l:

1358

if cachestart >= 0 and cacheend <= l:

1356

if cachestart == 0 and cacheend == l:

1359

if cachestart == 0 and cacheend == l:

1357

return d # avoid a copy

1360

return d # avoid a copy

1358

return util.buffer(d, cachestart, cacheend - cachestart)

1361

return util.buffer(d, cachestart, cacheend - cachestart)

1359

1362

1360

return self._readsegment(offset, length, df=df)

1363

return self._readsegment(offset, length, df=df)

1361

1364

1362

def _getsegmentforrevs(self, startrev, endrev, df=None):

1365

def _getsegmentforrevs(self, startrev, endrev, df=None):

1363

"""Obtain a segment of raw data corresponding to a range of revisions.

1366

"""Obtain a segment of raw data corresponding to a range of revisions.

1364

1367

1365

Accepts the start and end revisions and an optional already-open

1368

Accepts the start and end revisions and an optional already-open

1366

file handle to be used for reading. If the file handle is read, its

1369

file handle to be used for reading. If the file handle is read, its

1367

seek position will not be preserved.

1370

seek position will not be preserved.

1368

1371

1369

Requests for data may be satisfied by a cache.

1372

Requests for data may be satisfied by a cache.

1370

1373

1371

Returns a 2-tuple of (offset, data) for the requested range of

1374

Returns a 2-tuple of (offset, data) for the requested range of

1372

revisions. Offset is the integer offset from the beginning of the

1375

revisions. Offset is the integer offset from the beginning of the

1373

revlog and data is a str or buffer of the raw byte data.

1376

revlog and data is a str or buffer of the raw byte data.

1374

1377

1375

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1378

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1376

to determine where each revision's data begins and ends.

1379

to determine where each revision's data begins and ends.

1377

"""

1380

"""

1378

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1381

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1379

# (functions are expensive).

1382

# (functions are expensive).

1380

index = self.index

1383

index = self.index

1381

istart = index[startrev]

1384

istart = index[startrev]

1382

start = int(istart[0] >> 16)

1385

start = int(istart[0] >> 16)

1383

if startrev == endrev:

1386

if startrev == endrev:

1384

end = start + istart[1]

1387

end = start + istart[1]

1385

else:

1388

else:

1386

iend = index[endrev]

1389

iend = index[endrev]

1387

end = int(iend[0] >> 16) + iend[1]

1390

end = int(iend[0] >> 16) + iend[1]

1388

1391

1389

if self._inline:

1392

if self._inline:

1390

start += (startrev + 1) * self._io.size

1393

start += (startrev + 1) * self._io.size

1391

end += (endrev + 1) * self._io.size

1394

end += (endrev + 1) * self._io.size

1392

length = end - start

1395

length = end - start

1393

1396

1394

return start, self._getsegment(start, length, df=df)

1397

return start, self._getsegment(start, length, df=df)

1395

1398

1396

def _chunk(self, rev, df=None):

1399

def _chunk(self, rev, df=None):

1397

"""Obtain a single decompressed chunk for a revision.

1400

"""Obtain a single decompressed chunk for a revision.

1398

1401

1399

Accepts an integer revision and an optional already-open file handle

1402

Accepts an integer revision and an optional already-open file handle

1400

to be used for reading. If used, the seek position of the file will not

1403

to be used for reading. If used, the seek position of the file will not

1401

be preserved.

1404

be preserved.

1402

1405

1403

Returns a str holding uncompressed data for the requested revision.

1406

Returns a str holding uncompressed data for the requested revision.

1404

"""

1407

"""

1405

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1408

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1406

1409

1407

def _chunks(self, revs, df=None, targetsize=None):

1410

def _chunks(self, revs, df=None, targetsize=None):

1408

"""Obtain decompressed chunks for the specified revisions.

1411

"""Obtain decompressed chunks for the specified revisions.

1409

1412

1410

Accepts an iterable of numeric revisions that are assumed to be in

1413

Accepts an iterable of numeric revisions that are assumed to be in

1411

ascending order. Also accepts an optional already-open file handle

1414

ascending order. Also accepts an optional already-open file handle

1412

to be used for reading. If used, the seek position of the file will

1415

to be used for reading. If used, the seek position of the file will

1413

not be preserved.

1416

not be preserved.

1414

1417

1415

This function is similar to calling ``self._chunk()`` multiple times,

1418

This function is similar to calling ``self._chunk()`` multiple times,

1416

but is faster.

1419

but is faster.

1417

1420

1418

Returns a list with decompressed data for each requested revision.

1421

Returns a list with decompressed data for each requested revision.

1419

"""

1422

"""

1420

if not revs:

1423

if not revs:

1421

return []

1424

return []

1422

start = self.start

1425

start = self.start

1423

length = self.length

1426

length = self.length

1424

inline = self._inline

1427

inline = self._inline

1425

iosize = self._io.size

1428

iosize = self._io.size

1426

buffer = util.buffer

1429

buffer = util.buffer

1427

1430

1428

l = []

1431

l = []

1429

ladd = l.append

1432

ladd = l.append

1430

1433

1431

if not self._withsparseread:

1434

if not self._withsparseread:

1432

slicedchunks = (revs,)

1435

slicedchunks = (revs,)

1433

else:

1436

else:

1434

slicedchunks = deltautil.slicechunk(self, revs,

1437

slicedchunks = deltautil.slicechunk(self, revs,

1435

targetsize=targetsize)

1438

targetsize=targetsize)

1436

1439

1437

for revschunk in slicedchunks:

1440

for revschunk in slicedchunks:

1438

firstrev = revschunk[0]

1441

firstrev = revschunk[0]

1439

# Skip trailing revisions with empty diff

1442

# Skip trailing revisions with empty diff

1440

for lastrev in revschunk[::-1]:

1443

for lastrev in revschunk[::-1]:

1441

if length(lastrev) != 0:

1444

if length(lastrev) != 0:

1442

break

1445

break

1443

1446

1444

try:

1447

try:

1445

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1448

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1446

except OverflowError:

1449

except OverflowError:

1447

# issue4215 - we can't cache a run of chunks greater than

1450

# issue4215 - we can't cache a run of chunks greater than

1448

# 2G on Windows

1451

# 2G on Windows

1449

return [self._chunk(rev, df=df) for rev in revschunk]

1452

return [self._chunk(rev, df=df) for rev in revschunk]

1450

1453

1451

decomp = self.decompress

1454

decomp = self.decompress

1452

for rev in revschunk:

1455

for rev in revschunk:

1453

chunkstart = start(rev)

1456

chunkstart = start(rev)

1454

if inline:

1457

if inline:

1455

chunkstart += (rev + 1) * iosize

1458

chunkstart += (rev + 1) * iosize

1456

chunklength = length(rev)

1459

chunklength = length(rev)

1457

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1460

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1458

1461

1459

return l

1462

return l

1460

1463

1461

def _chunkclear(self):

1464

def _chunkclear(self):

1462

"""Clear the raw chunk cache."""

1465

"""Clear the raw chunk cache."""

1463

self._chunkcache = (0, '')

1466

self._chunkcache = (0, '')

1464

1467

1465

def deltaparent(self, rev):

1468

def deltaparent(self, rev):

1466

"""return deltaparent of the given revision"""

1469

"""return deltaparent of the given revision"""

1467

base = self.index[rev][3]

1470

base = self.index[rev][3]

1468

if base == rev:

1471

if base == rev:

1469

return nullrev

1472

return nullrev

1470

elif self._generaldelta:

1473

elif self._generaldelta:

1471

return base

1474

return base

1472

else:

1475

else:

1473

return rev - 1

1476

return rev - 1

1474

1477

1475

def issnapshot(self, rev):

1478

def issnapshot(self, rev):

1476

"""tells whether rev is a snapshot

1479

"""tells whether rev is a snapshot

1477

"""

1480

"""

1478

if rev == nullrev:

1481

if rev == nullrev:

1479

return True

1482

return True

1480

deltap = self.deltaparent(rev)

1483

deltap = self.deltaparent(rev)

1481

if deltap == nullrev:

1484

if deltap == nullrev:

1482

return True

1485

return True

1483

p1, p2 = self.parentrevs(rev)

1486

p1, p2 = self.parentrevs(rev)

1484

if deltap in (p1, p2):

1487

if deltap in (p1, p2):

1485

return False

1488

return False

1486

return self.issnapshot(deltap)

1489

return self.issnapshot(deltap)

1487

1490

1488

def snapshotdepth(self, rev):

1491

def snapshotdepth(self, rev):

1489

"""number of snapshot in the chain before this one"""

1492

"""number of snapshot in the chain before this one"""

1490

if not self.issnapshot(rev):

1493

if not self.issnapshot(rev):

1491

raise error.ProgrammingError('revision %d not a snapshot')

1494

raise error.ProgrammingError('revision %d not a snapshot')

1492

return len(self._deltachain(rev)[0]) - 1

1495

return len(self._deltachain(rev)[0]) - 1

1493

1496

1494

def revdiff(self, rev1, rev2):

1497

def revdiff(self, rev1, rev2):

1495

"""return or calculate a delta between two revisions

1498

"""return or calculate a delta between two revisions

1496

1499

1497

The delta calculated is in binary form and is intended to be written to

1500

The delta calculated is in binary form and is intended to be written to

1498

revlog data directly. So this function needs raw revision data.

1501

revlog data directly. So this function needs raw revision data.

1499

"""

1502

"""

1500

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1503

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1501

return bytes(self._chunk(rev2))

1504

return bytes(self._chunk(rev2))

1502

1505

1503

return mdiff.textdiff(self.revision(rev1, raw=True),

1506

return mdiff.textdiff(self.revision(rev1, raw=True),

1504

self.revision(rev2, raw=True))

1507

self.revision(rev2, raw=True))

1505

1508

1506

def revision(self, nodeorrev, _df=None, raw=False):

1509

def revision(self, nodeorrev, _df=None, raw=False):

1507

"""return an uncompressed revision of a given node or revision

1510

"""return an uncompressed revision of a given node or revision

1508

number.

1511

number.

1509

1512

1510

_df - an existing file handle to read from. (internal-only)

1513

_df - an existing file handle to read from. (internal-only)

1511

raw - an optional argument specifying if the revision data is to be

1514

raw - an optional argument specifying if the revision data is to be

1512

treated as raw data when applying flag transforms. 'raw' should be set

1515

treated as raw data when applying flag transforms. 'raw' should be set

1513

to True when generating changegroups or in debug commands.

1516

to True when generating changegroups or in debug commands.

1514

"""

1517

"""

1515

if isinstance(nodeorrev, int):

1518

if isinstance(nodeorrev, int):

1516

rev = nodeorrev

1519

rev = nodeorrev

1517

node = self.node(rev)

1520

node = self.node(rev)

1518

else:

1521

else:

1519

node = nodeorrev

1522

node = nodeorrev

1520

rev = None

1523

rev = None

1521

1524

1522

cachedrev = None

1525

cachedrev = None

1523

flags = None

1526

flags = None

1524

rawtext = None

1527

rawtext = None

1525

if node == nullid:

1528

if node == nullid:

1526

return ""

1529

return ""

1527

if self._revisioncache:

1530

if self._revisioncache:

1528

if self._revisioncache[0] == node:

1531

if self._revisioncache[0] == node:

1529

# _cache only stores rawtext

1532

# _cache only stores rawtext

1530

if raw:

1533

if raw:

1531

return self._revisioncache[2]

1534

return self._revisioncache[2]

1532

# duplicated, but good for perf

1535

# duplicated, but good for perf

1533

if rev is None:

1536

if rev is None:

1534

rev = self.rev(node)

1537

rev = self.rev(node)

1535

if flags is None:

1538

if flags is None:

1536

flags = self.flags(rev)

1539

flags = self.flags(rev)

1537

# no extra flags set, no flag processor runs, text = rawtext

1540

# no extra flags set, no flag processor runs, text = rawtext

1538

if flags == REVIDX_DEFAULT_FLAGS:

1541

if flags == REVIDX_DEFAULT_FLAGS:

1539

return self._revisioncache[2]

1542

return self._revisioncache[2]

1540

# rawtext is reusable. need to run flag processor

1543

# rawtext is reusable. need to run flag processor

1541

rawtext = self._revisioncache[2]

1544

rawtext = self._revisioncache[2]

1542

1545

1543

cachedrev = self._revisioncache[1]

1546

cachedrev = self._revisioncache[1]

1544

1547

1545

# look up what we need to read

1548

# look up what we need to read

1546

if rawtext is None:

1549

if rawtext is None:

1547

if rev is None:

1550

if rev is None:

1548

rev = self.rev(node)

1551

rev = self.rev(node)

1549

1552

1550

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1553

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1551

if stopped:

1554

if stopped:

1552

rawtext = self._revisioncache[2]

1555

rawtext = self._revisioncache[2]

1553

1556

1554

# drop cache to save memory

1557

# drop cache to save memory

1555

self._revisioncache = None

1558

self._revisioncache = None

1556

1559

1557

targetsize = None

1560

targetsize = None

1558

rawsize = self.index[rev][2]

1561

rawsize = self.index[rev][2]

1559

if 0 <= rawsize:

1562

if 0 <= rawsize:

1560

targetsize = 4 * rawsize

1563

targetsize = 4 * rawsize

1561

1564

1562

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1565

bins = self._chunks(chain, df=_df, targetsize=targetsize)

1563

if rawtext is None:

1566

if rawtext is None:

1564

rawtext = bytes(bins[0])

1567

rawtext = bytes(bins[0])

1565

bins = bins[1:]

1568

bins = bins[1:]

1566

1569

1567

rawtext = mdiff.patches(rawtext, bins)

1570

rawtext = mdiff.patches(rawtext, bins)

1568

self._revisioncache = (node, rev, rawtext)

1571

self._revisioncache = (node, rev, rawtext)

1569

1572

1570

if flags is None:

1573

if flags is None:

1571

if rev is None:

1574

if rev is None:

1572

rev = self.rev(node)

1575

rev = self.rev(node)

1573

flags = self.flags(rev)

1576

flags = self.flags(rev)

1574

1577

1575

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1578

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1576

if validatehash:

1579

if validatehash:

1577

self.checkhash(text, node, rev=rev)

1580

self.checkhash(text, node, rev=rev)

1578

1581

1579

return text

1582

return text

1580

1583

1581

def hash(self, text, p1, p2):

1584

def hash(self, text, p1, p2):

1582

"""Compute a node hash.

1585

"""Compute a node hash.

1583

1586

1584

Available as a function so that subclasses can replace the hash

1587

Available as a function so that subclasses can replace the hash

1585

as needed.

1588

as needed.

1586

"""

1589

"""

1587

return storageutil.hashrevisionsha1(text, p1, p2)

1590

return storageutil.hashrevisionsha1(text, p1, p2)

1588

1591

1589

def _processflags(self, text, flags, operation, raw=False):

1592

def _processflags(self, text, flags, operation, raw=False):

1590

"""Inspect revision data flags and applies transforms defined by

1593

"""Inspect revision data flags and applies transforms defined by

1591

registered flag processors.

1594

registered flag processors.

1592

1595

1593

``text`` - the revision data to process

1596

``text`` - the revision data to process

1594

``flags`` - the revision flags

1597

``flags`` - the revision flags

1595

``operation`` - the operation being performed (read or write)

1598

``operation`` - the operation being performed (read or write)

1596

``raw`` - an optional argument describing if the raw transform should be

1599

``raw`` - an optional argument describing if the raw transform should be

1597

applied.

1600

applied.

1598

1601

1599

This method processes the flags in the order (or reverse order if

1602

This method processes the flags in the order (or reverse order if

1600

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1603

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1601

flag processors registered for present flags. The order of flags defined

1604

flag processors registered for present flags. The order of flags defined

1602

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1605

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1603

1606

1604

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1607

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1605

processed text and ``validatehash`` is a bool indicating whether the

1608

processed text and ``validatehash`` is a bool indicating whether the

1606

returned text should be checked for hash integrity.

1609

returned text should be checked for hash integrity.

1607

1610

1608

Note: If the ``raw`` argument is set, it has precedence over the

1611

Note: If the ``raw`` argument is set, it has precedence over the

1609

operation and will only update the value of ``validatehash``.

1612

operation and will only update the value of ``validatehash``.

1610

"""

1613

"""

1611

# fast path: no flag processors will run

1614

# fast path: no flag processors will run

1612

if flags == 0:

1615

if flags == 0:

1613

return text, True

1616

return text, True

1614

if not operation in ('read', 'write'):

1617

if not operation in ('read', 'write'):

1615

raise error.ProgrammingError(_("invalid '%s' operation") %

1618

raise error.ProgrammingError(_("invalid '%s' operation") %

1616

operation)

1619

operation)

1617

# Check all flags are known.

1620

# Check all flags are known.

1618

if flags & ~REVIDX_KNOWN_FLAGS:

1621

if flags & ~REVIDX_KNOWN_FLAGS:

1619

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1622

raise error.RevlogError(_("incompatible revision flag '%#x'") %

1620

(flags & ~REVIDX_KNOWN_FLAGS))

1623

(flags & ~REVIDX_KNOWN_FLAGS))

1621

validatehash = True

1624

validatehash = True

1622

# Depending on the operation (read or write), the order might be

1625

# Depending on the operation (read or write), the order might be

1623

# reversed due to non-commutative transforms.

1626

# reversed due to non-commutative transforms.

1624

orderedflags = REVIDX_FLAGS_ORDER

1627

orderedflags = REVIDX_FLAGS_ORDER

1625

if operation == 'write':

1628

if operation == 'write':

1626

orderedflags = reversed(orderedflags)

1629

orderedflags = reversed(orderedflags)

1627

1630

1628

for flag in orderedflags:

1631

for flag in orderedflags:

1629

# If a flagprocessor has been registered for a known flag, apply the

1632

# If a flagprocessor has been registered for a known flag, apply the

1630

# related operation transform and update result tuple.

1633

# related operation transform and update result tuple.

1631

if flag & flags:

1634

if flag & flags:

1632

vhash = True

1635

vhash = True

1633

1636

1634

if flag not in self._flagprocessors:

1637

if flag not in self._flagprocessors:

1635

message = _("missing processor for flag '%#x'") % (flag)

1638

message = _("missing processor for flag '%#x'") % (flag)

1636

raise error.RevlogError(message)

1639

raise error.RevlogError(message)

1637

1640

1638

processor = self._flagprocessors[flag]

1641

processor = self._flagprocessors[flag]

1639

if processor is not None:

1642

if processor is not None:

1640

readtransform, writetransform, rawtransform = processor

1643

readtransform, writetransform, rawtransform = processor

1641

1644

1642

if raw:

1645

if raw:

1643

vhash = rawtransform(self, text)

1646

vhash = rawtransform(self, text)

1644

elif operation == 'read':

1647

elif operation == 'read':

1645

text, vhash = readtransform(self, text)

1648

text, vhash = readtransform(self, text)

1646

else: # write operation

1649

else: # write operation

1647

text, vhash = writetransform(self, text)

1650

text, vhash = writetransform(self, text)

1648

validatehash = validatehash and vhash

1651

validatehash = validatehash and vhash

1649

1652

1650

return text, validatehash

1653

return text, validatehash

1651

1654

1652

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1655

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1653

"""Check node hash integrity.

1656

"""Check node hash integrity.

1654

1657

1655

Available as a function so that subclasses can extend hash mismatch

1658

Available as a function so that subclasses can extend hash mismatch

1656

behaviors as needed.

1659

behaviors as needed.

1657

"""

1660

"""

1658

try:

1661

try:

1659

if p1 is None and p2 is None:

1662

if p1 is None and p2 is None:

1660

p1, p2 = self.parents(node)

1663

p1, p2 = self.parents(node)

1661

if node != self.hash(text, p1, p2):

1664

if node != self.hash(text, p1, p2):

1662

# Clear the revision cache on hash failure. The revision cache

1665

# Clear the revision cache on hash failure. The revision cache

1663

# only stores the raw revision and clearing the cache does have

1666

# only stores the raw revision and clearing the cache does have

1664

# the side-effect that we won't have a cache hit when the raw

1667

# the side-effect that we won't have a cache hit when the raw

1665

# revision data is accessed. But this case should be rare and

1668

# revision data is accessed. But this case should be rare and

1666

# it is extra work to teach the cache about the hash

1669

# it is extra work to teach the cache about the hash

1667

# verification state.

1670

# verification state.

1668

if self._revisioncache and self._revisioncache[0] == node:

1671

if self._revisioncache and self._revisioncache[0] == node:

1669

self._revisioncache = None

1672

self._revisioncache = None

1670

1673

1671

revornode = rev

1674

revornode = rev

1672

if revornode is None:

1675

if revornode is None:

1673

revornode = templatefilters.short(hex(node))

1676

revornode = templatefilters.short(hex(node))

1674

raise error.RevlogError(_("integrity check failed on %s:%s")

1677

raise error.RevlogError(_("integrity check failed on %s:%s")

1675

% (self.indexfile, pycompat.bytestr(revornode)))

1678

% (self.indexfile, pycompat.bytestr(revornode)))

1676

except error.RevlogError:

1679

except error.RevlogError:

1677

if self._censorable and storageutil.iscensoredtext(text):

1680

if self._censorable and storageutil.iscensoredtext(text):

1678

raise error.CensoredNodeError(self.indexfile, node, text)

1681

raise error.CensoredNodeError(self.indexfile, node, text)

1679

raise

1682

raise

1680

1683

1681

def _enforceinlinesize(self, tr, fp=None):

1684

def _enforceinlinesize(self, tr, fp=None):

1682

"""Check if the revlog is too big for inline and convert if so.

1685

"""Check if the revlog is too big for inline and convert if so.

1683

1686

1684

This should be called after revisions are added to the revlog. If the

1687

This should be called after revisions are added to the revlog. If the

1685

revlog has grown too large to be an inline revlog, it will convert it

1688

revlog has grown too large to be an inline revlog, it will convert it

1686

to use multiple index and data files.

1689

to use multiple index and data files.

1687

"""

1690

"""

1688

tiprev = len(self) - 1

1691

tiprev = len(self) - 1

1689

if (not self._inline or

1692

if (not self._inline or

1690

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1693

(self.start(tiprev) + self.length(tiprev)) < _maxinline):

1691

return

1694

return

1692

1695

1693

trinfo = tr.find(self.indexfile)

1696

trinfo = tr.find(self.indexfile)

1694

if trinfo is None:

1697

if trinfo is None:

1695

raise error.RevlogError(_("%s not found in the transaction")

1698

raise error.RevlogError(_("%s not found in the transaction")

1696

% self.indexfile)

1699

% self.indexfile)

1697

1700

1698

trindex = trinfo[2]

1701

trindex = trinfo[2]

1699

if trindex is not None:

1702

if trindex is not None:

1700

dataoff = self.start(trindex)

1703

dataoff = self.start(trindex)

1701

else:

1704

else:

1702

# revlog was stripped at start of transaction, use all leftover data

1705

# revlog was stripped at start of transaction, use all leftover data

1703

trindex = len(self) - 1

1706

trindex = len(self) - 1

1704

dataoff = self.end(tiprev)

1707

dataoff = self.end(tiprev)

1705

1708

1706

tr.add(self.datafile, dataoff)

1709

tr.add(self.datafile, dataoff)

1707

1710

1708

if fp:

1711

if fp:

1709

fp.flush()

1712

fp.flush()

1710

fp.close()

1713

fp.close()

1711

1714

1712

with self._datafp('w') as df:

1715

with self._datafp('w') as df:

1713

for r in self:

1716

for r in self:

1714

df.write(self._getsegmentforrevs(r, r)[1])

1717

df.write(self._getsegmentforrevs(r, r)[1])

1715

1718

1716

with self._indexfp('w') as fp:

1719

with self._indexfp('w') as fp:

1717

self.version &= ~FLAG_INLINE_DATA

1720

self.version &= ~FLAG_INLINE_DATA

1718

self._inline = False

1721

self._inline = False

1719

io = self._io

1722

io = self._io

1720

for i in self:

1723

for i in self:

1721

e = io.packentry(self.index[i], self.node, self.version, i)

1724

e = io.packentry(self.index[i], self.node, self.version, i)

1722

fp.write(e)

1725

fp.write(e)

1723

1726

1724

# the temp file replace the real index when we exit the context

1727

# the temp file replace the real index when we exit the context

1725

# manager

1728

# manager

1726

1729

1727

tr.replace(self.indexfile, trindex * self._io.size)

1730

tr.replace(self.indexfile, trindex * self._io.size)

1728

self._chunkclear()

1731

self._chunkclear()

1729

1732

1730

def _nodeduplicatecallback(self, transaction, node):

1733

def _nodeduplicatecallback(self, transaction, node):

1731

"""called when trying to add a node already stored.

1734

"""called when trying to add a node already stored.

1732

"""

1735

"""

1733

1736

1734

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1737

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1735

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1738

node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):

1736

"""add a revision to the log

1739

"""add a revision to the log

1737

1740

1738

text - the revision data to add

1741

text - the revision data to add

1739

transaction - the transaction object used for rollback

1742

transaction - the transaction object used for rollback

1740

link - the linkrev data to add

1743

link - the linkrev data to add

1741

p1, p2 - the parent nodeids of the revision

1744

p1, p2 - the parent nodeids of the revision

1742

cachedelta - an optional precomputed delta

1745

cachedelta - an optional precomputed delta

1743

node - nodeid of revision; typically node is not specified, and it is

1746

node - nodeid of revision; typically node is not specified, and it is

1744

computed by default as hash(text, p1, p2), however subclasses might

1747

computed by default as hash(text, p1, p2), however subclasses might

1745

use different hashing method (and override checkhash() in such case)

1748

use different hashing method (and override checkhash() in such case)

1746

flags - the known flags to set on the revision

1749

flags - the known flags to set on the revision

1747

deltacomputer - an optional deltacomputer instance shared between

1750

deltacomputer - an optional deltacomputer instance shared between

1748

multiple calls

1751

multiple calls

1749

"""

1752

"""

1750

if link == nullrev:

1753

if link == nullrev:

1751

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1754

raise error.RevlogError(_("attempted to add linkrev -1 to %s")

1752

% self.indexfile)

1755

% self.indexfile)

1753

1756

1754

if flags:

1757

if flags:

1755

node = node or self.hash(text, p1, p2)

1758

node = node or self.hash(text, p1, p2)

1756

1759

1757

rawtext, validatehash = self._processflags(text, flags, 'write')

1760

rawtext, validatehash = self._processflags(text, flags, 'write')

1758

1761

1759

# If the flag processor modifies the revision data, ignore any provided

1762

# If the flag processor modifies the revision data, ignore any provided

1760

# cachedelta.

1763

# cachedelta.

1761

if rawtext != text:

1764

if rawtext != text:

1762

cachedelta = None

1765

cachedelta = None

1763

1766

1764

if len(rawtext) > _maxentrysize:

1767

if len(rawtext) > _maxentrysize:

1765

raise error.RevlogError(

1768

raise error.RevlogError(

1766

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1769

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1767

% (self.indexfile, len(rawtext)))

1770

% (self.indexfile, len(rawtext)))

1768

1771

1769

node = node or self.hash(rawtext, p1, p2)

1772

node = node or self.hash(rawtext, p1, p2)

1770

if node in self.nodemap:

1773

if node in self.nodemap:

1771

return node

1774

return node

1772

1775

1773

if validatehash:

1776

if validatehash:

1774

self.checkhash(rawtext, node, p1=p1, p2=p2)

1777

self.checkhash(rawtext, node, p1=p1, p2=p2)

1775

1778

1776

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1779

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1777

flags, cachedelta=cachedelta,

1780

flags, cachedelta=cachedelta,

1778

deltacomputer=deltacomputer)

1781

deltacomputer=deltacomputer)

1779

1782

1780

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1783

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1781

cachedelta=None, deltacomputer=None):

1784

cachedelta=None, deltacomputer=None):

1782

"""add a raw revision with known flags, node and parents

1785

"""add a raw revision with known flags, node and parents

1783

useful when reusing a revision not stored in this revlog (ex: received

1786

useful when reusing a revision not stored in this revlog (ex: received

1784

over wire, or read from an external bundle).

1787

over wire, or read from an external bundle).

1785

"""

1788

"""

1786

dfh = None

1789

dfh = None

1787

if not self._inline:

1790

if not self._inline:

1788

dfh = self._datafp("a+")

1791

dfh = self._datafp("a+")

1789

ifh = self._indexfp("a+")

1792

ifh = self._indexfp("a+")

1790

try:

1793

try:

1791

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1794

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1792

flags, cachedelta, ifh, dfh,

1795

flags, cachedelta, ifh, dfh,

1793

deltacomputer=deltacomputer)

1796

deltacomputer=deltacomputer)

1794

finally:

1797

finally:

1795

if dfh:

1798

if dfh:

1796

dfh.close()

1799

dfh.close()

1797

ifh.close()

1800

ifh.close()

1798

1801

1799

def compress(self, data):

1802

def compress(self, data):

1800

"""Generate a possibly-compressed representation of data."""

1803

"""Generate a possibly-compressed representation of data."""

1801

if not data:

1804

if not data:

1802

return '', data

1805

return '', data

1803

1806

1804

compressed = self._compressor.compress(data)

1807

compressed = self._compressor.compress(data)

1805

1808

1806

if compressed:

1809

if compressed:

1807

# The revlog compressor added the header in the returned data.

1810

# The revlog compressor added the header in the returned data.

1808

return '', compressed

1811

return '', compressed

1809

1812

1810

if data[0:1] == '\0':

1813

if data[0:1] == '\0':

1811

return '', data

1814

return '', data

1812

return 'u', data

1815

return 'u', data

1813

1816

1814

def decompress(self, data):

1817

def decompress(self, data):

1815

"""Decompress a revlog chunk.

1818

"""Decompress a revlog chunk.

1816

1819

1817

The chunk is expected to begin with a header identifying the

1820

The chunk is expected to begin with a header identifying the

1818

format type so it can be routed to an appropriate decompressor.

1821

format type so it can be routed to an appropriate decompressor.

1819

"""

1822

"""

1820

if not data:

1823

if not data:

1821

return data

1824

return data

1822

1825

1823

# Revlogs are read much more frequently than they are written and many

1826

# Revlogs are read much more frequently than they are written and many

1824

# chunks only take microseconds to decompress, so performance is

1827

# chunks only take microseconds to decompress, so performance is

1825

# important here.

1828

# important here.

1826

#

1829

#

1827

# We can make a few assumptions about revlogs:

1830

# We can make a few assumptions about revlogs:

1828

#

1831

#

1829

# 1) the majority of chunks will be compressed (as opposed to inline

1832

# 1) the majority of chunks will be compressed (as opposed to inline

1830

# raw data).

1833

# raw data).

1831

# 2) decompressing *any* data will likely by at least 10x slower than

1834

# 2) decompressing *any* data will likely by at least 10x slower than

1832

# returning raw inline data.

1835

# returning raw inline data.

1833

# 3) we want to prioritize common and officially supported compression

1836

# 3) we want to prioritize common and officially supported compression

1834

# engines

1837

# engines

1835

#

1838

#

1836

# It follows that we want to optimize for "decompress compressed data

1839

# It follows that we want to optimize for "decompress compressed data

1837

# when encoded with common and officially supported compression engines"

1840

# when encoded with common and officially supported compression engines"

1838

# case over "raw data" and "data encoded by less common or non-official

1841

# case over "raw data" and "data encoded by less common or non-official

1839

# compression engines." That is why we have the inline lookup first

1842

# compression engines." That is why we have the inline lookup first

1840

# followed by the compengines lookup.

1843

# followed by the compengines lookup.

1841

#

1844

#

1842

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1845

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1843

# compressed chunks. And this matters for changelog and manifest reads.

1846

# compressed chunks. And this matters for changelog and manifest reads.

1844

t = data[0:1]

1847

t = data[0:1]

1845

1848

1846

if t == 'x':

1849

if t == 'x':

1847

try:

1850

try:

1848

return _zlibdecompress(data)

1851

return _zlibdecompress(data)

1849

except zlib.error as e:

1852

except zlib.error as e:

1850

raise error.RevlogError(_('revlog decompress error: %s') %

1853

raise error.RevlogError(_('revlog decompress error: %s') %

1851

stringutil.forcebytestr(e))

1854

stringutil.forcebytestr(e))

1852

# '\0' is more common than 'u' so it goes first.

1855

# '\0' is more common than 'u' so it goes first.

1853

elif t == '\0':

1856

elif t == '\0':

1854

return data

1857

return data

1855

elif t == 'u':

1858

elif t == 'u':

1856

return util.buffer(data, 1)

1859

return util.buffer(data, 1)

1857

1860

1858

try:

1861

try:

1859

compressor = self._decompressors[t]

1862

compressor = self._decompressors[t]

1860

except KeyError:

1863

except KeyError:

1861

try:

1864

try:

1862

engine = util.compengines.forrevlogheader(t)

1865

engine = util.compengines.forrevlogheader(t)

1863

compressor = engine.revlogcompressor()

1866

compressor = engine.revlogcompressor()

1864

self._decompressors[t] = compressor

1867

self._decompressors[t] = compressor

1865

except KeyError:

1868

except KeyError:

1866

raise error.RevlogError(_('unknown compression type %r') % t)

1869

raise error.RevlogError(_('unknown compression type %r') % t)

1867

1870

1868

return compressor.decompress(data)

1871

return compressor.decompress(data)

1869

1872

1870

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1873

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1871

cachedelta, ifh, dfh, alwayscache=False,

1874

cachedelta, ifh, dfh, alwayscache=False,

1872

deltacomputer=None):

1875

deltacomputer=None):

1873

"""internal function to add revisions to the log

1876

"""internal function to add revisions to the log

1874

1877

1875

see addrevision for argument descriptions.

1878

see addrevision for argument descriptions.

1876

1879

1877

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1880

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1878

1881

1879

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1882

if "deltacomputer" is not provided or None, a defaultdeltacomputer will

1880

be used.

1883

be used.

1881

1884

1882

invariants:

1885

invariants:

1883

- rawtext is optional (can be None); if not set, cachedelta must be set.

1886

- rawtext is optional (can be None); if not set, cachedelta must be set.

1884

if both are set, they must correspond to each other.

1887

if both are set, they must correspond to each other.

1885

"""

1888

"""

1886

if node == nullid:

1889

if node == nullid:

1887

raise error.RevlogError(_("%s: attempt to add null revision") %

1890

raise error.RevlogError(_("%s: attempt to add null revision") %

1888

self.indexfile)

1891

self.indexfile)

1889

if node == wdirid or node in wdirfilenodeids:

1892

if node == wdirid or node in wdirfilenodeids:

1890

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1893

raise error.RevlogError(_("%s: attempt to add wdir revision") %

1891

self.indexfile)

1894

self.indexfile)

1892

1895

1893

if self._inline:

1896

if self._inline:

1894

fh = ifh

1897

fh = ifh

1895

else:

1898

else:

1896

fh = dfh

1899

fh = dfh

1897

1900

1898

btext = [rawtext]

1901

btext = [rawtext]

1899

1902

1900

curr = len(self)

1903

curr = len(self)

1901

prev = curr - 1

1904

prev = curr - 1

1902

offset = self.end(prev)

1905

offset = self.end(prev)

1903

p1r, p2r = self.rev(p1), self.rev(p2)

1906

p1r, p2r = self.rev(p1), self.rev(p2)

1904

1907

1905

# full versions are inserted when the needed deltas

1908

# full versions are inserted when the needed deltas

1906

# become comparable to the uncompressed text

1909

# become comparable to the uncompressed text

1907

if rawtext is None:

1910

if rawtext is None:

1908

# need rawtext size, before changed by flag processors, which is

1911

# need rawtext size, before changed by flag processors, which is

1909

# the non-raw size. use revlog explicitly to avoid filelog's extra

1912

# the non-raw size. use revlog explicitly to avoid filelog's extra

1910

# logic that might remove metadata size.

1913

# logic that might remove metadata size.

1911

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1914

textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),

1912

cachedelta[1])

1915

cachedelta[1])

1913

else:

1916

else:

1914

textlen = len(rawtext)

1917

textlen = len(rawtext)

1915

1918

1916

if deltacomputer is None:

1919

if deltacomputer is None:

1917

deltacomputer = deltautil.deltacomputer(self)

1920

deltacomputer = deltautil.deltacomputer(self)

1918

1921

1919

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

1922

revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)

1920

1923

1921

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

1924

deltainfo = deltacomputer.finddeltainfo(revinfo, fh)

1922

1925

1923

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

1926

e = (offset_type(offset, flags), deltainfo.deltalen, textlen,

1924

deltainfo.base, link, p1r, p2r, node)

1927

deltainfo.base, link, p1r, p2r, node)

1925

self.index.append(e)

1928

self.index.append(e)

1926

self.nodemap[node] = curr

1929

self.nodemap[node] = curr

1927

1930

1928

entry = self._io.packentry(e, self.node, self.version, curr)

1931

entry = self._io.packentry(e, self.node, self.version, curr)

1929

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

1932

self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,

1930

link, offset)

1933

link, offset)

1931

1934

1932

rawtext = btext[0]

1935

rawtext = btext[0]

1933

1936

1934

if alwayscache and rawtext is None:

1937

if alwayscache and rawtext is None:

1935

rawtext = deltacomputer.buildtext(revinfo, fh)

1938

rawtext = deltacomputer.buildtext(revinfo, fh)

1936

1939

1937

if type(rawtext) == bytes: # only accept immutable objects

1940

if type(rawtext) == bytes: # only accept immutable objects

1938

self._revisioncache = (node, curr, rawtext)

1941

self._revisioncache = (node, curr, rawtext)

1939

self._chainbasecache[curr] = deltainfo.chainbase

1942

self._chainbasecache[curr] = deltainfo.chainbase

1940

return node

1943

return node

1941

1944

1942

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1945

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1943

# Files opened in a+ mode have inconsistent behavior on various

1946

# Files opened in a+ mode have inconsistent behavior on various

1944

# platforms. Windows requires that a file positioning call be made

1947

# platforms. Windows requires that a file positioning call be made

1945

# when the file handle transitions between reads and writes. See

1948

# when the file handle transitions between reads and writes. See

1946

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1949

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1947

# platforms, Python or the platform itself can be buggy. Some versions

1950

# platforms, Python or the platform itself can be buggy. Some versions

1948

# of Solaris have been observed to not append at the end of the file

1951

# of Solaris have been observed to not append at the end of the file

1949

# if the file was seeked to before the end. See issue4943 for more.

1952

# if the file was seeked to before the end. See issue4943 for more.

1950

#

1953

#

1951

# We work around this issue by inserting a seek() before writing.

1954

# We work around this issue by inserting a seek() before writing.

1952

# Note: This is likely not necessary on Python 3.

1955

# Note: This is likely not necessary on Python 3.

1953

ifh.seek(0, os.SEEK_END)

1956

ifh.seek(0, os.SEEK_END)

1954

if dfh:

1957

if dfh:

1955

dfh.seek(0, os.SEEK_END)

1958

dfh.seek(0, os.SEEK_END)

1956

1959

1957

curr = len(self) - 1

1960

curr = len(self) - 1

1958

if not self._inline:

1961

if not self._inline:

1959

transaction.add(self.datafile, offset)

1962

transaction.add(self.datafile, offset)

1960

transaction.add(self.indexfile, curr * len(entry))

1963

transaction.add(self.indexfile, curr * len(entry))

1961

if data[0]:

1964

if data[0]:

1962

dfh.write(data[0])

1965

dfh.write(data[0])

1963

dfh.write(data[1])

1966

dfh.write(data[1])

1964

ifh.write(entry)

1967

ifh.write(entry)

1965

else:

1968

else:

1966

offset += curr * self._io.size

1969

offset += curr * self._io.size

1967

transaction.add(self.indexfile, offset, curr)

1970

transaction.add(self.indexfile, offset, curr)

1968

ifh.write(entry)

1971

ifh.write(entry)

1969

ifh.write(data[0])

1972

ifh.write(data[0])

1970

ifh.write(data[1])

1973

ifh.write(data[1])

1971

self._enforceinlinesize(transaction, ifh)

1974

self._enforceinlinesize(transaction, ifh)

1972

1975

1973

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

1976

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

1974

"""

1977

"""

1975

add a delta group

1978

add a delta group

1976

1979

1977

given a set of deltas, add them to the revision log. the

1980

given a set of deltas, add them to the revision log. the

1978

first delta is against its parent, which should be in our

1981

first delta is against its parent, which should be in our

1979

log, the rest are against the previous delta.

1982

log, the rest are against the previous delta.

1980

1983

1981

If ``addrevisioncb`` is defined, it will be called with arguments of

1984

If ``addrevisioncb`` is defined, it will be called with arguments of

1982

this revlog and the node that was added.

1985

this revlog and the node that was added.

1983

"""

1986

"""

1984

1987

1985

nodes = []

1988

nodes = []

1986

1989

1987

r = len(self)

1990

r = len(self)

1988

end = 0

1991

end = 0

1989

if r:

1992

if r:

1990

end = self.end(r - 1)

1993

end = self.end(r - 1)

1991

ifh = self._indexfp("a+")

1994

ifh = self._indexfp("a+")

1992

isize = r * self._io.size

1995

isize = r * self._io.size

1993

if self._inline:

1996

if self._inline:

1994

transaction.add(self.indexfile, end + isize, r)

1997

transaction.add(self.indexfile, end + isize, r)

1995

dfh = None

1998

dfh = None

1996

else:

1999

else:

1997

transaction.add(self.indexfile, isize, r)

2000

transaction.add(self.indexfile, isize, r)

1998

transaction.add(self.datafile, end)

2001

transaction.add(self.datafile, end)

1999

dfh = self._datafp("a+")

2002

dfh = self._datafp("a+")

2000

def flush():

2003

def flush():

2001

if dfh:

2004

if dfh:

2002

dfh.flush()

2005

dfh.flush()

2003

ifh.flush()

2006

ifh.flush()

2004

try:

2007

try:

2005

deltacomputer = deltautil.deltacomputer(self)

2008

deltacomputer = deltautil.deltacomputer(self)

2006

# loop through our set of deltas

2009

# loop through our set of deltas

2007

for data in deltas:

2010

for data in deltas:

2008

node, p1, p2, linknode, deltabase, delta, flags = data

2011

node, p1, p2, linknode, deltabase, delta, flags = data

2009

link = linkmapper(linknode)

2012

link = linkmapper(linknode)

2010

flags = flags or REVIDX_DEFAULT_FLAGS

2013

flags = flags or REVIDX_DEFAULT_FLAGS

2011

2014

2012

nodes.append(node)

2015

nodes.append(node)

2013

2016

2014

if node in self.nodemap:

2017

if node in self.nodemap:

2015

self._nodeduplicatecallback(transaction, node)

2018

self._nodeduplicatecallback(transaction, node)

2016

# this can happen if two branches make the same change

2019

# this can happen if two branches make the same change

2017

continue

2020

continue

2018

2021

2019

for p in (p1, p2):

2022

for p in (p1, p2):

2020

if p not in self.nodemap:

2023

if p not in self.nodemap:

2021

raise error.LookupError(p, self.indexfile,

2024

raise error.LookupError(p, self.indexfile,

2022

_('unknown parent'))

2025

_('unknown parent'))

2023

2026

2024

if deltabase not in self.nodemap:

2027

if deltabase not in self.nodemap:

2025

raise error.LookupError(deltabase, self.indexfile,

2028

raise error.LookupError(deltabase, self.indexfile,

2026

_('unknown delta base'))

2029

_('unknown delta base'))

2027

2030

2028

baserev = self.rev(deltabase)

2031

baserev = self.rev(deltabase)

2029

2032

2030

if baserev != nullrev and self.iscensored(baserev):

2033

if baserev != nullrev and self.iscensored(baserev):

2031

# if base is censored, delta must be full replacement in a

2034

# if base is censored, delta must be full replacement in a

2032

# single patch operation

2035

# single patch operation

2033

hlen = struct.calcsize(">lll")

2036

hlen = struct.calcsize(">lll")

2034

oldlen = self.rawsize(baserev)

2037

oldlen = self.rawsize(baserev)

2035

newlen = len(delta) - hlen

2038

newlen = len(delta) - hlen

2036

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2039

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2037

raise error.CensoredBaseError(self.indexfile,

2040

raise error.CensoredBaseError(self.indexfile,

2038

self.node(baserev))

2041

self.node(baserev))

2039

2042

2040

if not flags and self._peek_iscensored(baserev, delta, flush):

2043

if not flags and self._peek_iscensored(baserev, delta, flush):

2041

flags |= REVIDX_ISCENSORED

2044

flags |= REVIDX_ISCENSORED

2042

2045

2043

# We assume consumers of addrevisioncb will want to retrieve

2046

# We assume consumers of addrevisioncb will want to retrieve

2044

# the added revision, which will require a call to

2047

# the added revision, which will require a call to

2045

# revision(). revision() will fast path if there is a cache

2048

# revision(). revision() will fast path if there is a cache

2046

# hit. So, we tell _addrevision() to always cache in this case.

2049

# hit. So, we tell _addrevision() to always cache in this case.

2047

# We're only using addgroup() in the context of changegroup

2050

# We're only using addgroup() in the context of changegroup

2048

# generation so the revision data can always be handled as raw

2051

# generation so the revision data can always be handled as raw

2049

# by the flagprocessor.

2052

# by the flagprocessor.

2050

self._addrevision(node, None, transaction, link,

2053

self._addrevision(node, None, transaction, link,

2051

p1, p2, flags, (baserev, delta),

2054

p1, p2, flags, (baserev, delta),

2052

ifh, dfh,

2055

ifh, dfh,

2053

alwayscache=bool(addrevisioncb),

2056

alwayscache=bool(addrevisioncb),

2054

deltacomputer=deltacomputer)

2057

deltacomputer=deltacomputer)

2055

2058

2056

if addrevisioncb:

2059

if addrevisioncb:

2057

addrevisioncb(self, node)

2060

addrevisioncb(self, node)

2058

2061

2059

if not dfh and not self._inline:

2062

if not dfh and not self._inline:

2060

# addrevision switched from inline to conventional

2063

# addrevision switched from inline to conventional

2061

# reopen the index

2064

# reopen the index

2062

ifh.close()

2065

ifh.close()

2063

dfh = self._datafp("a+")

2066

dfh = self._datafp("a+")

2064

ifh = self._indexfp("a+")

2067

ifh = self._indexfp("a+")

2065

finally:

2068

finally:

2066

if dfh:

2069

if dfh:

2067

dfh.close()

2070

dfh.close()

2068

ifh.close()

2071

ifh.close()

2069

2072

2070

return nodes

2073

return nodes

2071

2074

2072

def iscensored(self, rev):

2075

def iscensored(self, rev):

2073

"""Check if a file revision is censored."""

2076

"""Check if a file revision is censored."""

2074

if not self._censorable:

2077

if not self._censorable:

2075

return False

2078

return False

2076

2079

2077

return self.flags(rev) & REVIDX_ISCENSORED

2080

return self.flags(rev) & REVIDX_ISCENSORED

2078

2081

2079

def _peek_iscensored(self, baserev, delta, flush):

2082

def _peek_iscensored(self, baserev, delta, flush):

2080

"""Quickly check if a delta produces a censored revision."""

2083

"""Quickly check if a delta produces a censored revision."""

2081

if not self._censorable:

2084

if not self._censorable:

2082

return False

2085

return False

2083

2086

2084

# Fragile heuristic: unless new file meta keys are added alphabetically

2087

# Fragile heuristic: unless new file meta keys are added alphabetically

2085

# preceding "censored", all censored revisions are prefixed by

2088

# preceding "censored", all censored revisions are prefixed by

2086

# "\1\ncensored:". A delta producing such a censored revision must be a

2089

# "\1\ncensored:". A delta producing such a censored revision must be a

2087

# full-replacement delta, so we inspect the first and only patch in the

2090

# full-replacement delta, so we inspect the first and only patch in the

2088

# delta for this prefix.

2091

# delta for this prefix.

2089

hlen = struct.calcsize(">lll")

2092

hlen = struct.calcsize(">lll")

2090

if len(delta) <= hlen:

2093

if len(delta) <= hlen:

2091

return False

2094

return False

2092

2095

2093

oldlen = self.rawsize(baserev)

2096

oldlen = self.rawsize(baserev)

2094

newlen = len(delta) - hlen

2097

newlen = len(delta) - hlen

2095

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2098

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2096

return False

2099

return False

2097

2100

2098

add = "\1\ncensored:"

2101

add = "\1\ncensored:"

2099

addlen = len(add)

2102

addlen = len(add)

2100

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2103

return newlen >= addlen and delta[hlen:hlen + addlen] == add

2101

2104

2102

def getstrippoint(self, minlink):

2105

def getstrippoint(self, minlink):

2103

"""find the minimum rev that must be stripped to strip the linkrev

2106

"""find the minimum rev that must be stripped to strip the linkrev

2104

2107

2105

Returns a tuple containing the minimum rev and a set of all revs that

2108

Returns a tuple containing the minimum rev and a set of all revs that

2106

have linkrevs that will be broken by this strip.

2109

have linkrevs that will be broken by this strip.

2107

"""

2110

"""

2108

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2111

return storageutil.resolvestripinfo(minlink, len(self) - 1,

2109

self.headrevs(),

2112

self.headrevs(),

2110

self.linkrev, self.parentrevs)

2113

self.linkrev, self.parentrevs)

2111

2114

2112

def strip(self, minlink, transaction):

2115

def strip(self, minlink, transaction):

2113

"""truncate the revlog on the first revision with a linkrev >= minlink

2116

"""truncate the revlog on the first revision with a linkrev >= minlink

2114

2117

2115

This function is called when we're stripping revision minlink and

2118

This function is called when we're stripping revision minlink and

2116

its descendants from the repository.

2119

its descendants from the repository.

2117

2120

2118

We have to remove all revisions with linkrev >= minlink, because

2121

We have to remove all revisions with linkrev >= minlink, because

2119

the equivalent changelog revisions will be renumbered after the

2122

the equivalent changelog revisions will be renumbered after the

2120

strip.

2123

strip.

2121

2124

2122

So we truncate the revlog on the first of these revisions, and

2125

So we truncate the revlog on the first of these revisions, and

2123

trust that the caller has saved the revisions that shouldn't be

2126

trust that the caller has saved the revisions that shouldn't be

2124

removed and that it'll re-add them after this truncation.

2127

removed and that it'll re-add them after this truncation.

2125

"""

2128

"""

2126

if len(self) == 0:

2129

if len(self) == 0:

2127

return

2130

return

2128

2131

2129

rev, _ = self.getstrippoint(minlink)

2132

rev, _ = self.getstrippoint(minlink)

2130

if rev == len(self):

2133

if rev == len(self):

2131

return

2134

return

2132

2135

2133

# first truncate the files on disk

2136

# first truncate the files on disk

2134

end = self.start(rev)

2137

end = self.start(rev)

2135

if not self._inline:

2138

if not self._inline:

2136

transaction.add(self.datafile, end)

2139

transaction.add(self.datafile, end)

2137

end = rev * self._io.size

2140

end = rev * self._io.size

2138

else:

2141

else:

2139

end += rev * self._io.size

2142

end += rev * self._io.size

2140

2143

2141

transaction.add(self.indexfile, end)

2144

transaction.add(self.indexfile, end)

2142

2145

2143

# then reset internal state in memory to forget those revisions

2146

# then reset internal state in memory to forget those revisions

2144

self._revisioncache = None

2147

self._revisioncache = None

2145

self._chaininfocache = {}

2148

self._chaininfocache = {}

2146

self._chunkclear()

2149

self._chunkclear()

2147

for x in pycompat.xrange(rev, len(self)):

2150

for x in pycompat.xrange(rev, len(self)):

2148

del self.nodemap[self.node(x)]

2151

del self.nodemap[self.node(x)]

2149

2152

2150

del self.index[rev:-1]

2153

del self.index[rev:-1]

2151

self._nodepos = None

2154

self._nodepos = None

2152

2155

2153

def checksize(self):

2156

def checksize(self):

2154

expected = 0

2157

expected = 0

2155

if len(self):

2158

if len(self):

2156

expected = max(0, self.end(len(self) - 1))

2159

expected = max(0, self.end(len(self) - 1))

2157

2160

2158

try:

2161

try:

2159

with self._datafp() as f:

2162

with self._datafp() as f:

2160

f.seek(0, 2)

2163

f.seek(0, 2)

2161

actual = f.tell()

2164

actual = f.tell()

2162

dd = actual - expected

2165

dd = actual - expected

2163

except IOError as inst:

2166

except IOError as inst:

2164

if inst.errno != errno.ENOENT:

2167

if inst.errno != errno.ENOENT:

2165

raise

2168

raise

2166

dd = 0

2169

dd = 0

2167

2170

2168

try:

2171

try:

2169

f = self.opener(self.indexfile)

2172

f = self.opener(self.indexfile)

2170

f.seek(0, 2)

2173

f.seek(0, 2)

2171

actual = f.tell()

2174

actual = f.tell()

2172

f.close()

2175

f.close()

2173

s = self._io.size

2176

s = self._io.size

2174

i = max(0, actual // s)

2177

i = max(0, actual // s)

2175

di = actual - (i * s)

2178

di = actual - (i * s)

2176

if self._inline:

2179

if self._inline:

2177

databytes = 0

2180

databytes = 0

2178

for r in self:

2181

for r in self:

2179

databytes += max(0, self.length(r))

2182

databytes += max(0, self.length(r))

2180

dd = 0

2183

dd = 0

2181

di = actual - len(self) * s - databytes

2184

di = actual - len(self) * s - databytes

2182

except IOError as inst:

2185

except IOError as inst:

2183

if inst.errno != errno.ENOENT:

2186

if inst.errno != errno.ENOENT:

2184

raise

2187

raise

2185

di = 0

2188

di = 0

2186

2189

2187

return (dd, di)

2190

return (dd, di)

2188

2191

2189

def files(self):

2192

def files(self):

2190

res = [self.indexfile]

2193

res = [self.indexfile]

2191

if not self._inline:

2194

if not self._inline:

2192

res.append(self.datafile)

2195

res.append(self.datafile)

2193

return res

2196

return res

2194

2197

2195

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2198

def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,

2196

assumehaveparentrevisions=False, deltaprevious=False):

2199

assumehaveparentrevisions=False, deltaprevious=False):

2197

if nodesorder not in ('nodes', 'storage', None):

2200

if nodesorder not in ('nodes', 'storage', None):

2198

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2201

raise error.ProgrammingError('unhandled value for nodesorder: %s' %

2199

nodesorder)

2202

nodesorder)

2200

2203

2201

if nodesorder is None and not self._generaldelta:

2204

if nodesorder is None and not self._generaldelta:

2202

nodesorder = 'storage'

2205

nodesorder = 'storage'

2203

2206

2204

return storageutil.emitrevisions(

2207

return storageutil.emitrevisions(

2205

self, nodes, nodesorder, revlogrevisiondelta,

2208

self, nodes, nodesorder, revlogrevisiondelta,

2206

deltaparentfn=self.deltaparent,

2209

deltaparentfn=self.deltaparent,

2207

candeltafn=self.candelta,

2210

candeltafn=self.candelta,

2208

rawsizefn=self.rawsize,

2211

rawsizefn=self.rawsize,

2209

revdifffn=self.revdiff,

2212

revdifffn=self.revdiff,

2210

flagsfn=self.flags,

2213

flagsfn=self.flags,

2211

sendfulltext=not self._storedeltachains,

2214

sendfulltext=not self._storedeltachains,

2212

revisiondata=revisiondata,

2215

revisiondata=revisiondata,

2213

assumehaveparentrevisions=assumehaveparentrevisions,

2216

assumehaveparentrevisions=assumehaveparentrevisions,

2214

deltaprevious=deltaprevious)

2217

deltaprevious=deltaprevious)

2215

2218

2216

DELTAREUSEALWAYS = 'always'

2219

DELTAREUSEALWAYS = 'always'

2217

DELTAREUSESAMEREVS = 'samerevs'

2220

DELTAREUSESAMEREVS = 'samerevs'

2218

DELTAREUSENEVER = 'never'

2221

DELTAREUSENEVER = 'never'

2219

2222

2220

DELTAREUSEFULLADD = 'fulladd'

2223

DELTAREUSEFULLADD = 'fulladd'

2221

2224

2222

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2225

DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}

2223

2226

2224

def clone(self, tr, destrevlog, addrevisioncb=None,

2227

def clone(self, tr, destrevlog, addrevisioncb=None,

2225

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2228

deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):

2226

"""Copy this revlog to another, possibly with format changes.

2229

"""Copy this revlog to another, possibly with format changes.

2227

2230

2228

The destination revlog will contain the same revisions and nodes.

2231

The destination revlog will contain the same revisions and nodes.

2229

However, it may not be bit-for-bit identical due to e.g. delta encoding

2232

However, it may not be bit-for-bit identical due to e.g. delta encoding

2230

differences.

2233

differences.

2231

2234

2232

The ``deltareuse`` argument control how deltas from the existing revlog

2235

The ``deltareuse`` argument control how deltas from the existing revlog

2233

are preserved in the destination revlog. The argument can have the

2236

are preserved in the destination revlog. The argument can have the

2234

following values:

2237

following values:

2235

2238

2236

DELTAREUSEALWAYS

2239

DELTAREUSEALWAYS

2237

Deltas will always be reused (if possible), even if the destination

2240

Deltas will always be reused (if possible), even if the destination

2238

revlog would not select the same revisions for the delta. This is the

2241

revlog would not select the same revisions for the delta. This is the

2239

fastest mode of operation.

2242

fastest mode of operation.

2240

DELTAREUSESAMEREVS

2243

DELTAREUSESAMEREVS

2241

Deltas will be reused if the destination revlog would pick the same

2244

Deltas will be reused if the destination revlog would pick the same

2242

revisions for the delta. This mode strikes a balance between speed

2245

revisions for the delta. This mode strikes a balance between speed

2243

and optimization.

2246

and optimization.

2244

DELTAREUSENEVER

2247

DELTAREUSENEVER

2245

Deltas will never be reused. This is the slowest mode of execution.

2248

Deltas will never be reused. This is the slowest mode of execution.

2246

This mode can be used to recompute deltas (e.g. if the diff/delta

2249

This mode can be used to recompute deltas (e.g. if the diff/delta

2247

algorithm changes).

2250

algorithm changes).

2248

2251

2249

Delta computation can be slow, so the choice of delta reuse policy can

2252

Delta computation can be slow, so the choice of delta reuse policy can

2250

significantly affect run time.

2253

significantly affect run time.

2251

2254

2252

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2255

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2253

two extremes. Deltas will be reused if they are appropriate. But if the

2256

two extremes. Deltas will be reused if they are appropriate. But if the

2254

delta could choose a better revision, it will do so. This means if you

2257

delta could choose a better revision, it will do so. This means if you

2255

are converting a non-generaldelta revlog to a generaldelta revlog,

2258

are converting a non-generaldelta revlog to a generaldelta revlog,

2256

deltas will be recomputed if the delta's parent isn't a parent of the

2259

deltas will be recomputed if the delta's parent isn't a parent of the

2257

revision.

2260

revision.

2258

2261

2259

In addition to the delta policy, the ``deltabothparents`` argument

2262

In addition to the delta policy, the ``deltabothparents`` argument

2260

controls whether to compute deltas against both parents for merges.

2263

controls whether to compute deltas against both parents for merges.

2261

By default, the current default is used.

2264

By default, the current default is used.

2262

"""

2265

"""

2263

if deltareuse not in self.DELTAREUSEALL:

2266

if deltareuse not in self.DELTAREUSEALL:

2264

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2267

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2265

2268

2266

if len(destrevlog):

2269

if len(destrevlog):

2267

raise ValueError(_('destination revlog is not empty'))

2270

raise ValueError(_('destination revlog is not empty'))

2268

2271

2269

if getattr(self, 'filteredrevs', None):

2272

if getattr(self, 'filteredrevs', None):

2270

raise ValueError(_('source revlog has filtered revisions'))

2273

raise ValueError(_('source revlog has filtered revisions'))

2271

if getattr(destrevlog, 'filteredrevs', None):

2274

if getattr(destrevlog, 'filteredrevs', None):

2272

raise ValueError(_('destination revlog has filtered revisions'))

2275

raise ValueError(_('destination revlog has filtered revisions'))

2273

2276

2274

# lazydeltabase controls whether to reuse a cached delta, if possible.

2277

# lazydeltabase controls whether to reuse a cached delta, if possible.

2275

oldlazydeltabase = destrevlog._lazydeltabase

2278

oldlazydeltabase = destrevlog._lazydeltabase

2276

oldamd = destrevlog._deltabothparents

2279

oldamd = destrevlog._deltabothparents

2277

2280

2278

try:

2281

try:

2279

if deltareuse == self.DELTAREUSEALWAYS:

2282

if deltareuse == self.DELTAREUSEALWAYS:

2280

destrevlog._lazydeltabase = True

2283

destrevlog._lazydeltabase = True

2281

elif deltareuse == self.DELTAREUSESAMEREVS:

2284

elif deltareuse == self.DELTAREUSESAMEREVS:

2282

destrevlog._lazydeltabase = False

2285

destrevlog._lazydeltabase = False

2283

2286

2284

destrevlog._deltabothparents = deltabothparents or oldamd

2287

destrevlog._deltabothparents = deltabothparents or oldamd

2285

2288

2286

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2289

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2287

self.DELTAREUSESAMEREVS)

2290

self.DELTAREUSESAMEREVS)

2288

2291

2289

deltacomputer = deltautil.deltacomputer(destrevlog)

2292

deltacomputer = deltautil.deltacomputer(destrevlog)

2290

index = self.index

2293

index = self.index

2291

for rev in self:

2294

for rev in self:

2292

entry = index[rev]

2295

entry = index[rev]

2293

2296

2294

# Some classes override linkrev to take filtered revs into

2297

# Some classes override linkrev to take filtered revs into

2295

# account. Use raw entry from index.

2298

# account. Use raw entry from index.

2296

flags = entry[0] & 0xffff

2299

flags = entry[0] & 0xffff

2297

linkrev = entry[4]

2300

linkrev = entry[4]

2298

p1 = index[entry[5]][7]

2301

p1 = index[entry[5]][7]

2299

p2 = index[entry[6]][7]

2302

p2 = index[entry[6]][7]

2300

node = entry[7]

2303

node = entry[7]

2301

2304

2302

# (Possibly) reuse the delta from the revlog if allowed and

2305

# (Possibly) reuse the delta from the revlog if allowed and

2303

# the revlog chunk is a delta.

2306

# the revlog chunk is a delta.

2304

cachedelta = None

2307

cachedelta = None

2305

rawtext = None

2308

rawtext = None

2306

if populatecachedelta:

2309

if populatecachedelta:

2307

dp = self.deltaparent(rev)

2310

dp = self.deltaparent(rev)

2308

if dp != nullrev:

2311

if dp != nullrev:

2309

cachedelta = (dp, bytes(self._chunk(rev)))

2312

cachedelta = (dp, bytes(self._chunk(rev)))

2310

2313

2311

if not cachedelta:

2314

if not cachedelta:

2312

rawtext = self.revision(rev, raw=True)

2315

rawtext = self.revision(rev, raw=True)

2313

2316

2314

2317

2315

if deltareuse == self.DELTAREUSEFULLADD:

2318

if deltareuse == self.DELTAREUSEFULLADD:

2316

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2319

destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,

2317

cachedelta=cachedelta,

2320

cachedelta=cachedelta,

2318

node=node, flags=flags,

2321

node=node, flags=flags,

2319

deltacomputer=deltacomputer)

2322

deltacomputer=deltacomputer)

2320

else:

2323

else:

2321

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2324

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2322

checkambig=False)

2325

checkambig=False)

2323

dfh = None

2326

dfh = None

2324

if not destrevlog._inline:

2327

if not destrevlog._inline:

2325

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2328

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2326

try:

2329

try:

2327

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2330

destrevlog._addrevision(node, rawtext, tr, linkrev, p1,

2328

p2, flags, cachedelta, ifh, dfh,

2331

p2, flags, cachedelta, ifh, dfh,

2329

deltacomputer=deltacomputer)

2332

deltacomputer=deltacomputer)

2330

finally:

2333

finally:

2331

if dfh:

2334

if dfh:

2332

dfh.close()

2335

dfh.close()

2333

ifh.close()

2336

ifh.close()

2334

2337

2335

if addrevisioncb:

2338

if addrevisioncb:

2336

addrevisioncb(self, rev, node)

2339

addrevisioncb(self, rev, node)

2337

finally:

2340

finally:

2338

destrevlog._lazydeltabase = oldlazydeltabase

2341

destrevlog._lazydeltabase = oldlazydeltabase

2339

destrevlog._deltabothparents = oldamd

2342

destrevlog._deltabothparents = oldamd

2340

2343

2341

def censorrevision(self, node, tombstone=b''):

2344

def censorrevision(self, node, tombstone=b''):

2342

if (self.version & 0xFFFF) == REVLOGV0:

2345

if (self.version & 0xFFFF) == REVLOGV0:

2343

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2346

raise error.RevlogError(_('cannot censor with version %d revlogs') %

2344

self.version)

2347

self.version)

2345

2348

2346

rev = self.rev(node)

2349

rev = self.rev(node)

2347

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2350

tombstone = storageutil.packmeta({b'censored': tombstone}, b'')

2348

2351

2349

if len(tombstone) > self.rawsize(rev):

2352

if len(tombstone) > self.rawsize(rev):

2350

raise error.Abort(_('censor tombstone must be no longer than '

2353

raise error.Abort(_('censor tombstone must be no longer than '

2351

'censored data'))

2354

'censored data'))

2352

2355

2353

# Using two files instead of one makes it easy to rewrite entry-by-entry

2356

# Using two files instead of one makes it easy to rewrite entry-by-entry

2354

idxread = self.opener(self.indexfile, 'r')

2357

idxread = self.opener(self.indexfile, 'r')

2355

idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)

2358

idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)

2356

if self.version & FLAG_INLINE_DATA:

2359

if self.version & FLAG_INLINE_DATA:

2357

dataread, datawrite = idxread, idxwrite

2360

dataread, datawrite = idxread, idxwrite

2358

else:

2361

else:

2359

dataread = self.opener(self.datafile, 'r')

2362

dataread = self.opener(self.datafile, 'r')

2360

datawrite = self.opener(self.datafile, 'wb', atomictemp=True)

2363

datawrite = self.opener(self.datafile, 'wb', atomictemp=True)

2361

2364

2362

# Copy all revlog data up to the entry to be censored.

2365

# Copy all revlog data up to the entry to be censored.

2363

offset = self.start(rev)

2366

offset = self.start(rev)

2364

2367

2365

for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):

2368

for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):

2366

idxwrite.write(chunk)

2369

idxwrite.write(chunk)

2367

for chunk in util.filechunkiter(dataread, limit=offset):

2370

for chunk in util.filechunkiter(dataread, limit=offset):

2368

datawrite.write(chunk)

2371

datawrite.write(chunk)

2369

2372

2370

def rewriteindex(r, newoffs, newdata=None):

2373

def rewriteindex(r, newoffs, newdata=None):

2371

"""Rewrite the index entry with a new data offset and new data.

2374

"""Rewrite the index entry with a new data offset and new data.

2372

2375

2373

The newdata argument, if given, is a tuple of three positive

2376

The newdata argument, if given, is a tuple of three positive

2374

integers: (new compressed, new uncompressed, added flag bits).

2377

integers: (new compressed, new uncompressed, added flag bits).

2375

"""

2378

"""

2376

offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]

2379

offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]

2377

flags = gettype(offlags)

2380

flags = gettype(offlags)

2378

if newdata:

2381

if newdata:

2379

comp, uncomp, nflags = newdata

2382

comp, uncomp, nflags = newdata

2380

flags |= nflags

2383

flags |= nflags

2381

offlags = offset_type(newoffs, flags)

2384

offlags = offset_type(newoffs, flags)

2382

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

2385

e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)

2383

idxwrite.write(self._io.packentry(e, None, self.version, r))

2386

idxwrite.write(self._io.packentry(e, None, self.version, r))

2384

idxread.seek(self._io.size, 1)

2387

idxread.seek(self._io.size, 1)

2385

2388

2386

def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):

2389

def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):

2387

"""Write the given fulltext with the given data offset.

2390

"""Write the given fulltext with the given data offset.

2388

2391

2389

Returns:

2392

Returns:

2390

The integer number of data bytes written, for tracking data

2393

The integer number of data bytes written, for tracking data

2391

offsets.

2394

offsets.

2392

"""

2395

"""

2393

flag, compdata = self.compress(data)

2396

flag, compdata = self.compress(data)

2394

newcomp = len(flag) + len(compdata)

2397

newcomp = len(flag) + len(compdata)

2395

rewriteindex(r, offs, (newcomp, len(data), nflags))

2398

rewriteindex(r, offs, (newcomp, len(data), nflags))

2396

datawrite.write(flag)

2399

datawrite.write(flag)

2397

datawrite.write(compdata)

2400

datawrite.write(compdata)

2398

dataread.seek(self.length(r), 1)

2401

dataread.seek(self.length(r), 1)

2399

return newcomp

2402

return newcomp

2400

2403

2401

# Rewrite censored entry with (padded) tombstone data.

2404

# Rewrite censored entry with (padded) tombstone data.

2402

pad = ' ' * (self.rawsize(rev) - len(tombstone))

2405

pad = ' ' * (self.rawsize(rev) - len(tombstone))

2403

offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)

2406

offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)

2404

2407

2405

# Rewrite all following filelog revisions fixing up offsets and deltas.

2408

# Rewrite all following filelog revisions fixing up offsets and deltas.

2406

for srev in pycompat.xrange(rev + 1, len(self)):

2409

for srev in pycompat.xrange(rev + 1, len(self)):

2407

if rev in self.parentrevs(srev):

2410

if rev in self.parentrevs(srev):

2408

# Immediate children of censored node must be re-added as

2411

# Immediate children of censored node must be re-added as

2409

# fulltext.

2412

# fulltext.

2410

try:

2413

try:

2411

revdata = self.revision(srev)

2414

revdata = self.revision(srev)

2412

except error.CensoredNodeError as e:

2415

except error.CensoredNodeError as e:

2413

revdata = e.tombstone

2416

revdata = e.tombstone

2414

dlen = rewrite(srev, offset, revdata)

2417

dlen = rewrite(srev, offset, revdata)

2415

else:

2418

else:

2416

# Copy any other revision data verbatim after fixing up the

2419

# Copy any other revision data verbatim after fixing up the

2417

# offset.

2420

# offset.

2418

rewriteindex(srev, offset)

2421

rewriteindex(srev, offset)

2419

dlen = self.length(srev)

2422

dlen = self.length(srev)

2420

for chunk in util.filechunkiter(dataread, limit=dlen):

2423

for chunk in util.filechunkiter(dataread, limit=dlen):

2421

datawrite.write(chunk)

2424

datawrite.write(chunk)

2422

offset += dlen

2425

offset += dlen

2423

2426

2424

idxread.close()

2427

idxread.close()

2425

idxwrite.close()

2428

idxwrite.close()

2426

if dataread is not idxread:

2429

if dataread is not idxread:

2427

dataread.close()

2430

dataread.close()

2428

datawrite.close()

2431

datawrite.close()

2429

2432

2430

def verifyintegrity(self, state):

2433

def verifyintegrity(self, state):

2431

"""Verifies the integrity of the revlog.

2434

"""Verifies the integrity of the revlog.

2432

2435

2433

Yields ``revlogproblem`` instances describing problems that are

2436

Yields ``revlogproblem`` instances describing problems that are

2434

found.

2437

found.

2435

"""

2438

"""

2436

dd, di = self.checksize()

2439

dd, di = self.checksize()

2437

if dd:

2440

if dd:

2438

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2441

yield revlogproblem(error=_('data length off by %d bytes') % dd)

2439

if di:

2442

if di:

2440

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2443

yield revlogproblem(error=_('index contains %d extra bytes') % di)

2441

2444

2442

version = self.version & 0xFFFF

2445

version = self.version & 0xFFFF

2443

2446

2444

# The verifier tells us what version revlog we should be.

2447

# The verifier tells us what version revlog we should be.

2445

if version != state['expectedversion']:

2448

if version != state['expectedversion']:

2446

yield revlogproblem(

2449

yield revlogproblem(

2447

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2450

warning=_("warning: '%s' uses revlog format %d; expected %d") %

2448

(self.indexfile, version, state['expectedversion']))

2451

(self.indexfile, version, state['expectedversion']))

2449

2452

2450

state['skipread'] = set()

2453

state['skipread'] = set()

2451

2454

2452

for rev in self:

2455

for rev in self:

2453

node = self.node(rev)

2456

node = self.node(rev)

2454

2457

2455

# Verify contents. 4 cases to care about:

2458

# Verify contents. 4 cases to care about:

2456

#

2459

#

2457

# common: the most common case

2460

# common: the most common case

2458

# rename: with a rename

2461

# rename: with a rename

2459

# meta: file content starts with b'\1\n', the metadata

2462

# meta: file content starts with b'\1\n', the metadata

2460

# header defined in filelog.py, but without a rename

2463

# header defined in filelog.py, but without a rename

2461

# ext: content stored externally

2464

# ext: content stored externally

2462

#

2465

#

2463

# More formally, their differences are shown below:

2466

# More formally, their differences are shown below:

2464

#

2467

#

2465

# | common | rename | meta | ext

2468

# | common | rename | meta | ext

2466

# -------------------------------------------------------

2469

# -------------------------------------------------------

2467

# flags() | 0 | 0 | 0 | not 0

2470

# flags() | 0 | 0 | 0 | not 0

2468

# renamed() | False | True | False | ?

2471

# renamed() | False | True | False | ?

2469

# rawtext[0:2]=='\1\n'| False | True | True | ?

2472

# rawtext[0:2]=='\1\n'| False | True | True | ?

2470

#

2473

#

2471

# "rawtext" means the raw text stored in revlog data, which

2474

# "rawtext" means the raw text stored in revlog data, which

2472

# could be retrieved by "revision(rev, raw=True)". "text"

2475

# could be retrieved by "revision(rev, raw=True)". "text"

2473

# mentioned below is "revision(rev, raw=False)".

2476

# mentioned below is "revision(rev, raw=False)".

2474

#

2477

#

2475

# There are 3 different lengths stored physically:

2478

# There are 3 different lengths stored physically:

2476

# 1. L1: rawsize, stored in revlog index

2479

# 1. L1: rawsize, stored in revlog index

2477

# 2. L2: len(rawtext), stored in revlog data

2480

# 2. L2: len(rawtext), stored in revlog data

2478

# 3. L3: len(text), stored in revlog data if flags==0, or

2481

# 3. L3: len(text), stored in revlog data if flags==0, or

2479

# possibly somewhere else if flags!=0

2482

# possibly somewhere else if flags!=0

2480

#

2483

#

2481

# L1 should be equal to L2. L3 could be different from them.

2484

# L1 should be equal to L2. L3 could be different from them.

2482

# "text" may or may not affect commit hash depending on flag

2485

# "text" may or may not affect commit hash depending on flag

2483

# processors (see revlog.addflagprocessor).

2486

# processors (see revlog.addflagprocessor).

2484

#

2487

#

2485

# | common | rename | meta | ext

2488

# | common | rename | meta | ext

2486

# -------------------------------------------------

2489

# -------------------------------------------------

2487

# rawsize() | L1 | L1 | L1 | L1

2490

# rawsize() | L1 | L1 | L1 | L1

2488

# size() | L1 | L2-LM | L1(*) | L1 (?)

2491

# size() | L1 | L2-LM | L1(*) | L1 (?)

2489

# len(rawtext) | L2 | L2 | L2 | L2

2492

# len(rawtext) | L2 | L2 | L2 | L2

2490

# len(text) | L2 | L2 | L2 | L3

2493

# len(text) | L2 | L2 | L2 | L3

2491

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2494

# len(read()) | L2 | L2-LM | L2-LM | L3 (?)

2492

#

2495

#

2493

# LM: length of metadata, depending on rawtext

2496

# LM: length of metadata, depending on rawtext

2494

# (*): not ideal, see comment in filelog.size

2497

# (*): not ideal, see comment in filelog.size

2495

# (?): could be "- len(meta)" if the resolved content has

2498

# (?): could be "- len(meta)" if the resolved content has

2496

# rename metadata

2499

# rename metadata

2497

#

2500

#

2498

# Checks needed to be done:

2501

# Checks needed to be done:

2499

# 1. length check: L1 == L2, in all cases.

2502

# 1. length check: L1 == L2, in all cases.

2500

# 2. hash check: depending on flag processor, we may need to

2503

# 2. hash check: depending on flag processor, we may need to

2501

# use either "text" (external), or "rawtext" (in revlog).

2504

# use either "text" (external), or "rawtext" (in revlog).

2502

2505

2503

try:

2506

try:

2504

skipflags = state.get('skipflags', 0)

2507

skipflags = state.get('skipflags', 0)

2505

if skipflags:

2508

if skipflags:

2506

skipflags &= self.flags(rev)

2509

skipflags &= self.flags(rev)

2507

2510

2508

if skipflags:

2511

if skipflags:

2509

state['skipread'].add(node)

2512

state['skipread'].add(node)

2510

else:

2513

else:

2511

# Side-effect: read content and verify hash.

2514

# Side-effect: read content and verify hash.

2512

self.revision(node)

2515

self.revision(node)

2513

2516

2514

l1 = self.rawsize(rev)

2517

l1 = self.rawsize(rev)

2515

l2 = len(self.revision(node, raw=True))

2518

l2 = len(self.revision(node, raw=True))

2516

2519

2517

if l1 != l2:

2520

if l1 != l2:

2518

yield revlogproblem(

2521

yield revlogproblem(

2519

error=_('unpacked size is %d, %d expected') % (l2, l1),

2522

error=_('unpacked size is %d, %d expected') % (l2, l1),

2520

node=node)

2523

node=node)

2521

2524

2522

except error.CensoredNodeError:

2525

except error.CensoredNodeError:

2523

if state['erroroncensored']:

2526

if state['erroroncensored']:

2524

yield revlogproblem(error=_('censored file data'),

2527

yield revlogproblem(error=_('censored file data'),

2525

node=node)

2528

node=node)

2526

state['skipread'].add(node)

2529

state['skipread'].add(node)

2527

except Exception as e:

2530

except Exception as e:

2528

yield revlogproblem(

2531

yield revlogproblem(

2529

error=_('unpacking %s: %s') % (short(node),

2532

error=_('unpacking %s: %s') % (short(node),

2530

stringutil.forcebytestr(e)),

2533

stringutil.forcebytestr(e)),

2531

node=node)

2534

node=node)

2532

state['skipread'].add(node)

2535

state['skipread'].add(node)

2533

2536

2534

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2537

def storageinfo(self, exclusivefiles=False, sharedfiles=False,

2535

revisionscount=False, trackedsize=False,

2538

revisionscount=False, trackedsize=False,

2536

storedsize=False):

2539

storedsize=False):

2537

d = {}

2540

d = {}

2538

2541

2539

if exclusivefiles:

2542

if exclusivefiles:

2540

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2543

d['exclusivefiles'] = [(self.opener, self.indexfile)]

2541

if not self._inline:

2544

if not self._inline:

2542

d['exclusivefiles'].append((self.opener, self.datafile))

2545

d['exclusivefiles'].append((self.opener, self.datafile))

2543

2546

2544

if sharedfiles:

2547

if sharedfiles:

2545

d['sharedfiles'] = []

2548

d['sharedfiles'] = []

2546

2549

2547

if revisionscount:

2550

if revisionscount:

2548

d['revisionscount'] = len(self)

2551

d['revisionscount'] = len(self)

2549

2552

2550

if trackedsize:

2553

if trackedsize:

2551

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2554

d['trackedsize'] = sum(map(self.rawsize, iter(self)))

2552

2555

2553

if storedsize:

2556

if storedsize:

2554

d['storedsize'] = sum(self.opener.stat(path).st_size

2557

d['storedsize'] = sum(self.opener.stat(path).st_size

2555

for path in self.files())

2558

for path in self.files())

2556

2559

2557

return d

2560

return d

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import contextlib
             import errno
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullhex,
                 nullid,
                 nullrev,
                 short,
                 wdirfilenodeids,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from .revlogutils.constants import (
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_ISCENSORED,
                 REVIDX_KNOWN_FLAGS,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
             )
             from .thirdparty import (
                 attr,
             )
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 repository,
                 templatefilters,
                 util,
             )
             from .revlogutils import (
                 deltas as deltautil,
             )
             from .utils import (
                 interfaceutil,
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_KNOWN_FLAGS
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
                   and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise error.ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldindex(list):
                 def __getitem__(self, i):
                     if i == -1:
                         return (0, 0, 0, -1, -1, -1, -1, nullid)
                     return list.__getitem__(self, i)
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return revlogoldindex(index), nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise error.RevlogError(_('index entry flags need revlog '
                                                   'version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False, censorable=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(_flagprocessors)
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'deltabothparents' in opts:
                             self._deltabothparents = opts['deltabothparents']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._sparserevlog = bool(opts.get('sparse-revlog', False))
                         withsparseread = bool(opts.get('with-sparse-read', False))
                         # sparse-revlog forces sparse-read
                         self._withsparseread = self._sparserevlog or withsparseread
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                         if opts.get('enableellipsis'):
                             self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(_('revlog chunk cache size %r is not '
                                                   'greater than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(_('revlog chunk cache size %r is not a '
                                                   'power of 2') % self._chunkcachesize)
+                    self._loadindex(v, mmapindexthreshold)
+                def _loadindex(self, v, mmapindexthreshold):
                     indexdata = ''
                     self._initempty = True
                     try:
                         with self._indexfp() as f:
                             if (mmapindexthreshold is not None and
                                 self.opener.fstat(f).st_size >= mmapindexthreshold):
                                 indexdata = util.buffer(util.mmapread(f))
                             else:
                                 indexdata = f.read()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise error.RevlogError(_('unknown flags (%#04x) in version %d '
                                                       'revlog %s') %
                                                     (flags >> 16, fmt, self.indexfile))
                     else:
                         raise error.RevlogError(_('unknown version (%d) in revlog %s') %
                                                 (fmt, self.indexfile))
                     self._storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise error.RevlogError(_("index %s is corrupted") %
                                                 self.indexfile)
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def _indexfp(self, mode='r'):
                     """file object for the revlog's index file"""
                     args = {r'mode': mode}
                     if mode != 'r':
                         args[r'checkambig'] = self._checkambig
                     if mode == 'w':
                         args[r'atomictemp'] = True
                     return self.opener(self.indexfile, **args)
                 def _datafp(self, mode='r'):
                     """file object for the revlog's data file"""
                     return self.opener(self.datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     if existingfp is not None:
                         yield existingfp
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 def tip(self):
                     return self.node(len(self.index) - 1)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @util.propertycache
                 def nodemap(self):
                     if self.index:
                         # populate mapping down to the initial node
                         node0 = self.index[0][7]  # get around changelog filtering
                         self.rev(node0)
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
                         or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
                         return False
                     return True
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 1
                         else:
                             assert p < len(i)
                         for r in pycompat.xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid or node in wdirfilenodeids:
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     return entry[5], entry[6]
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = set(self.rev(n) for n in stop or [])
                     revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
                                                 stoprevs=stoprevs)
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return a in self._commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except error.LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if "%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise error.RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise error.AmbiguousPrefixLookupError(
                                 id, self.indexfile, _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if nullhex.startswith(id):
                                 nl.append(nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.indexfile, _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             node = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if node is None:
                             raise error.LookupError(node, self.indexfile, _('no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == 'f' for c in prefix)
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != wdirid:
                                 raise error.LookupError(node, self.indexfile, _('no node'))
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == wdirid:
                         for length in range(minlength, 41):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, 41):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(self, revs,
                                                             targetsize=targetsize)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot
                     """
                     if rev == nullrev:
                         return True
                     deltap = self.deltaparent(rev)
                     if deltap == nullrev:
                         return True
                     p1, p2 = self.parentrevs(rev)
                     if deltap in (p1, p2):
                         return False
                     return self.issnapshot(deltap)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError('revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._revisioncache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._revisioncache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._revisioncache[2]
                         cachedrev = self._revisioncache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._revisioncache[2]
                         # drop cache to save memory
                         self._revisioncache = None
                         targetsize = None
                         rawsize = self.index[rev][2]
                         if 0 <= rawsize:
                             targetsize = 4 * rawsize
                         bins = self._chunks(chain, df=_df, targetsize=targetsize)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._revisioncache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise error.ProgrammingError(_("invalid '%s' operation") %
                                                      operation)
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise error.RevlogError(_("incompatible revision flag '%#x'") %
                                                 (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in self._flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise error.RevlogError(message)
                             processor = self._flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(_("integrity check failed on %s:%s")
                                 % (self.indexfile, pycompat.bytestr(revornode)))
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.indexfile, node, text)
                         raise
                 def _enforceinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     if (not self._inline or
                         (self.start(tiprev) + self.length(tiprev)) < _maxinline):
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise error.RevlogError(_("%s not found in the transaction")
                                                 % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(tiprev)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     with self._datafp('w') as df:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     with self._indexfp('w') as fp:
                         self.version &= ~FLAG_INLINE_DATA
                         self._inline = False
                         io = self._io
                         for i in self:
                             e = io.packentry(self.index[i], self.node, self.version, i)
                             fp.write(e)
                         # the temp file replace the real index when we exit the context
                         # manager
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored.
                     """
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(_("attempted to add linkrev -1 to %s")
                                                 % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta,
                                                deltacomputer=deltacomputer)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None, deltacomputer=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self._datafp("a+")
                     ifh = self._indexfp("a+")
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh,
                                                  deltacomputer=deltacomputer)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(_('revlog decompress error: %s') %
                                                     stringutil.forcebytestr(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False,
                                  deltacomputer=None):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise error.RevlogError(_("%s: attempt to add null revision") %
                                                 self.indexfile)
                     if node == wdirid or node in wdirfilenodeids:
                         raise error.RevlogError(_("%s: attempt to add wdir revision") %
                                                 self.indexfile)
                     if self._inline:
                         fh = ifh
                     else:
                         fh = dfh
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
                          deltainfo.base, link, p1r, p2r, node)
                     self.index.append(e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
                                      link, offset)
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes: # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self._enforceinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self._indexfp("a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self._datafp("a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         deltacomputer = deltautil.deltacomputer(self)
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 self._nodeduplicatecallback(transaction, node)
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise error.LookupError(p, self.indexfile,
                                                             _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise error.LookupError(deltabase, self.indexfile,
                                                         _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb),
                                               deltacomputer=deltacomputer)
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self._datafp("a+")
                                 ifh = self._indexfp("a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     # Fragile heuristic: unless new file meta keys are added alphabetically
                     # preceding "censored", all censored revisions are prefixed by
                     # "\1\ncensored:". A delta producing such a censored revision must be a
                     # full-replacement delta, so we inspect the first and only patch in the
                     # delta for this prefix.
                     hlen = struct.calcsize(">lll")
                     if len(delta) <= hlen:
                         return False
                     oldlen = self.rawsize(baserev)
                     newlen = len(delta) - hlen
                     if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                         return False
                     add = "\1\ncensored:"
                     addlen = len(add)
                     return newlen >= addlen and delta[hlen:hlen + addlen] == add
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(minlink, len(self) - 1,
                                                         self.headrevs(),
                                                         self.linkrev, self.parentrevs)
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in pycompat.xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                     self._nodepos = None
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, 2)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
                                   assumehaveparentrevisions=False, deltaprevious=False):
                     if nodesorder not in ('nodes', 'storage', None):
                         raise error.ProgrammingError('unhandled value for nodesorder: %s' %
                                                      nodesorder)
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = 'storage'
                     return storageutil.emitrevisions(
                         self, nodes, nodesorder, revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         sendfulltext=not self._storedeltachains,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         deltaprevious=deltaprevious)
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEFULLADD = 'fulladd'
                 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``deltabothparents`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._deltabothparents = deltabothparents or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         deltacomputer = deltautil.deltacomputer(destrevlog)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             if deltareuse == self.DELTAREUSEFULLADD:
                                 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
                                                        cachedelta=cachedelta,
                                                        node=node, flags=flags,
                                                        deltacomputer=deltacomputer)
                             else:
                                 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                         checkambig=False)
                                 dfh = None
                                 if not destrevlog._inline:
                                     dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                                 try:
                                     destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
                                                             p2, flags, cachedelta, ifh, dfh,
                                                             deltacomputer=deltacomputer)
                                 finally:
                                     if dfh:
                                         dfh.close()
                                     ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def censorrevision(self, node, tombstone=b''):
                     if (self.version & 0xFFFF) == REVLOGV0:
                         raise error.RevlogError(_('cannot censor with version %d revlogs') %
                                                 self.version)
                     rev = self.rev(node)
                     tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
                     if len(tombstone) > self.rawsize(rev):
                         raise error.Abort(_('censor tombstone must be no longer than '
                                             'censored data'))
                     # Using two files instead of one makes it easy to rewrite entry-by-entry
                     idxread = self.opener(self.indexfile, 'r')
                     idxwrite = self.opener(self.indexfile, 'wb', atomictemp=True)
                     if self.version & FLAG_INLINE_DATA:
                         dataread, datawrite = idxread, idxwrite
                     else:
                         dataread = self.opener(self.datafile, 'r')
                         datawrite = self.opener(self.datafile, 'wb', atomictemp=True)
                     # Copy all revlog data up to the entry to be censored.
                     offset = self.start(rev)
                     for chunk in util.filechunkiter(idxread, limit=rev * self._io.size):
                         idxwrite.write(chunk)
                     for chunk in util.filechunkiter(dataread, limit=offset):
                         datawrite.write(chunk)
                     def rewriteindex(r, newoffs, newdata=None):
                         """Rewrite the index entry with a new data offset and new data.
                         The newdata argument, if given, is a tuple of three positive
                         integers: (new compressed, new uncompressed, added flag bits).
                         """
                         offlags, comp, uncomp, base, link, p1, p2, nodeid = self.index[r]
                         flags = gettype(offlags)
                         if newdata:
                             comp, uncomp, nflags = newdata
                             flags |= nflags
                         offlags = offset_type(newoffs, flags)
                         e = (offlags, comp, uncomp, r, link, p1, p2, nodeid)
                         idxwrite.write(self._io.packentry(e, None, self.version, r))
                         idxread.seek(self._io.size, 1)
                     def rewrite(r, offs, data, nflags=REVIDX_DEFAULT_FLAGS):
                         """Write the given fulltext with the given data offset.
                         Returns:
                             The integer number of data bytes written, for tracking data
                             offsets.
                         """
                         flag, compdata = self.compress(data)
                         newcomp = len(flag) + len(compdata)
                         rewriteindex(r, offs, (newcomp, len(data), nflags))
                         datawrite.write(flag)
                         datawrite.write(compdata)
                         dataread.seek(self.length(r), 1)
                         return newcomp
                     # Rewrite censored entry with (padded) tombstone data.
                     pad = ' ' * (self.rawsize(rev) - len(tombstone))
                     offset += rewrite(rev, offset, tombstone + pad, REVIDX_ISCENSORED)
                     # Rewrite all following filelog revisions fixing up offsets and deltas.
                     for srev in pycompat.xrange(rev + 1, len(self)):
                         if rev in self.parentrevs(srev):
                             # Immediate children of censored node must be re-added as
                             # fulltext.
                             try:
                                 revdata = self.revision(srev)
                             except error.CensoredNodeError as e:
                                 revdata = e.tombstone
                             dlen = rewrite(srev, offset, revdata)
                         else:
                             # Copy any other revision data verbatim after fixing up the
                             # offset.
                             rewriteindex(srev, offset)
                             dlen = self.length(srev)
                             for chunk in util.filechunkiter(dataread, limit=dlen):
                                 datawrite.write(chunk)
                         offset += dlen
                     idxread.close()
                     idxwrite.close()
                     if dataread is not idxread:
                         dataread.close()
                         datawrite.close()
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_('data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_('index contains %d extra bytes') % di)
                     version = self.version & 0xFFFF
                     # The verifier tells us what version revlog we should be.
                     if version != state['expectedversion']:
                         yield revlogproblem(
                             warning=_("warning: '%s' uses revlog format %d; expected %d") %
                                     (self.indexfile, version, state['expectedversion']))
                     state['skipread'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "revision(rev, raw=True)". "text"
                         # mentioned below is "revision(rev, raw=False)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see revlog.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get('skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             if skipflags:
                                 state['skipread'].add(node)
                             else:
                                 # Side-effect: read content and verify hash.
                                 self.revision(node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.revision(node, raw=True))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_('unpacked size is %d, %d expected') % (l2, l1),
                                     node=node)
                         except error.CensoredNodeError:
                             if state['erroroncensored']:
                                 yield revlogproblem(error=_('censored file data'),
                                                     node=node)
                                 state['skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_('unpacking %s: %s') % (short(node),
                                                                stringutil.forcebytestr(e)),
                                 node=node)
                             state['skipread'].add(node)
                 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
                                 revisionscount=False, trackedsize=False,
                                 storedsize=False):
                     d = {}
                     if exclusivefiles:
                         d['exclusivefiles'] = [(self.opener, self.indexfile)]
                         if not self._inline:
                             d['exclusivefiles'].append((self.opener, self.datafile))
                     if sharedfiles:
                         d['sharedfiles'] = []
                     if revisionscount:
                         d['revisionscount'] = len(self)
                     if trackedsize:
                         d['trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d['storedsize'] = sum(self.opener.stat(path).st_size
                                               for path in self.files())
                     return d