upstream/mercurial-mirror Commit - r34899:1bde8e8e

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import heapq

21

import os

21

import os

22

import struct

22

import struct

23

import zlib

23

import zlib

24

25

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

26

from .node import (

26

from .node import (

27

bin,

27

bin,

28

hex,

28

hex,

29

nullid,

29

nullid,

30

nullrev,

30

nullrev,

31

wdirhex,

31

wdirhex,

32

wdirid,

32

wdirid,

33

wdirrev,

33

wdirrev,

34

)

34

)

35

from .i18n import _

35

from .i18n import _

36

from . import (

36

from . import (

37

ancestor,

37

ancestor,

38

error,

38

error,

39

mdiff,

39

mdiff,

40

policy,

40

policy,

41

pycompat,

41

pycompat,

42

templatefilters,

42

templatefilters,

43

util,

43

util,

44

)

44

)

45

46

parsers = policy.importmod(r'parsers')

46

parsers = policy.importmod(r'parsers')

47

48

# Aliased for performance.

48

# Aliased for performance.

49

_zlibdecompress = zlib.decompress

49

_zlibdecompress = zlib.decompress

50

51

# revlog header flags

51

# revlog header flags

52

REVLOGV0 = 0

52

REVLOGV0 = 0

53

REVLOGV1 = 1

53

REVLOGV1 = 1

54

# Dummy value until file format is finalized.

54

# Dummy value until file format is finalized.

55

# Reminder: change the bounds check in revlog.__init__ when this is changed.

55

# Reminder: change the bounds check in revlog.__init__ when this is changed.

56

REVLOGV2 = 0xDEAD

56

REVLOGV2 = 0xDEAD

57

FLAG_INLINE_DATA = (1 << 16)

57

FLAG_INLINE_DATA = (1 << 16)

58

FLAG_GENERALDELTA = (1 << 17)

58

FLAG_GENERALDELTA = (1 << 17)

59

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

59

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

60

REVLOG_DEFAULT_FORMAT = REVLOGV1

60

REVLOG_DEFAULT_FORMAT = REVLOGV1

61

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

61

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

62

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

62

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

63

REVLOGV2_FLAGS = REVLOGV1_FLAGS

63

REVLOGV2_FLAGS = REVLOGV1_FLAGS

64

65

# revlog index flags

65

# revlog index flags

66

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

66

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

67

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

67

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

68

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

68

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

69

REVIDX_DEFAULT_FLAGS = 0

69

REVIDX_DEFAULT_FLAGS = 0

70

# stable order in which flags need to be processed and their processors applied

70

# stable order in which flags need to be processed and their processors applied

71

REVIDX_FLAGS_ORDER = [

71

REVIDX_FLAGS_ORDER = [

72

REVIDX_ISCENSORED,

72

REVIDX_ISCENSORED,

73

REVIDX_ELLIPSIS,

73

REVIDX_ELLIPSIS,

74

REVIDX_EXTSTORED,

74

REVIDX_EXTSTORED,

75

]

75

]

76

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

76

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

77

78

# max size of revlog with inline data

78

# max size of revlog with inline data

79

_maxinline = 131072

79

_maxinline = 131072

80

_chunksize = 1048576

80

_chunksize = 1048576

81

82

RevlogError = error.RevlogError

82

RevlogError = error.RevlogError

83

LookupError = error.LookupError

83

LookupError = error.LookupError

84

CensoredNodeError = error.CensoredNodeError

84

CensoredNodeError = error.CensoredNodeError

85

ProgrammingError = error.ProgrammingError

85

ProgrammingError = error.ProgrammingError

86

87

# Store flag processors (cf. 'addflagprocessor()' to register)

87

# Store flag processors (cf. 'addflagprocessor()' to register)

88

_flagprocessors = {

88

_flagprocessors = {

89

REVIDX_ISCENSORED: None,

89

REVIDX_ISCENSORED: None,

90

}

90

}

91

92

def addflagprocessor(flag, processor):

92

def addflagprocessor(flag, processor):

93

"""Register a flag processor on a revision data flag.

93

"""Register a flag processor on a revision data flag.

94

95

Invariant:

95

Invariant:

96

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

96

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

97

- Only one flag processor can be registered on a specific flag.

97

- Only one flag processor can be registered on a specific flag.

98

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

98

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

99

following signatures:

99

following signatures:

100

- (read) f(self, rawtext) -> text, bool

100

- (read) f(self, rawtext) -> text, bool

101

- (write) f(self, text) -> rawtext, bool

101

- (write) f(self, text) -> rawtext, bool

102

- (raw) f(self, rawtext) -> bool

102

- (raw) f(self, rawtext) -> bool

103

"text" is presented to the user. "rawtext" is stored in revlog data, not

103

"text" is presented to the user. "rawtext" is stored in revlog data, not

104

directly visible to the user.

104

directly visible to the user.

105

The boolean returned by these transforms is used to determine whether

105

The boolean returned by these transforms is used to determine whether

106

the returned text can be used for hash integrity checking. For example,

106

the returned text can be used for hash integrity checking. For example,

107

if "write" returns False, then "text" is used to generate hash. If

107

if "write" returns False, then "text" is used to generate hash. If

108

"write" returns True, that basically means "rawtext" returned by "write"

108

"write" returns True, that basically means "rawtext" returned by "write"

109

should be used to generate hash. Usually, "write" and "read" return

109

should be used to generate hash. Usually, "write" and "read" return

110

different booleans. And "raw" returns a same boolean as "write".

110

different booleans. And "raw" returns a same boolean as "write".

111

112

Note: The 'raw' transform is used for changegroup generation and in some

112

Note: The 'raw' transform is used for changegroup generation and in some

113

debug commands. In this case the transform only indicates whether the

113

debug commands. In this case the transform only indicates whether the

114

contents can be used for hash integrity checks.

114

contents can be used for hash integrity checks.

115

"""

115

"""

116

if not flag & REVIDX_KNOWN_FLAGS:

116

if not flag & REVIDX_KNOWN_FLAGS:

117

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

117

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

118

raise ProgrammingError(msg)

118

raise ProgrammingError(msg)

119

if flag not in REVIDX_FLAGS_ORDER:

119

if flag not in REVIDX_FLAGS_ORDER:

120

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

120

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

121

raise ProgrammingError(msg)

121

raise ProgrammingError(msg)

122

if flag in _flagprocessors:

122

if flag in _flagprocessors:

123

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

123

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

124

raise error.Abort(msg)

124

raise error.Abort(msg)

125

_flagprocessors[flag] = processor

125

_flagprocessors[flag] = processor

126

127

def getoffset(q):

127

def getoffset(q):

128

return int(q >> 16)

128

return int(q >> 16)

129

130

def gettype(q):

130

def gettype(q):

131

return int(q & 0xFFFF)

131

return int(q & 0xFFFF)

132

133

def offset_type(offset, type):

133

def offset_type(offset, type):

134

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

134

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

135

raise ValueError('unknown revlog index flags')

135

raise ValueError('unknown revlog index flags')

136

return int(int(offset) << 16 | type)

136

return int(int(offset) << 16 | type)

137

138

_nullhash = hashlib.sha1(nullid)

138

_nullhash = hashlib.sha1(nullid)

139

140

def hash(text, p1, p2):

140

def hash(text, p1, p2):

141

"""generate a hash from the given text and its parent hashes

141

"""generate a hash from the given text and its parent hashes

142

143

This hash combines both the current file contents and its history

143

This hash combines both the current file contents and its history

144

in a manner that makes it easy to distinguish nodes with the same

144

in a manner that makes it easy to distinguish nodes with the same

145

content in the revision graph.

145

content in the revision graph.

146

"""

146

"""

147

# As of now, if one of the parent node is null, p2 is null

147

# As of now, if one of the parent node is null, p2 is null

148

if p2 == nullid:

148

if p2 == nullid:

149

# deep copy of a hash is faster than creating one

149

# deep copy of a hash is faster than creating one

150

s = _nullhash.copy()

150

s = _nullhash.copy()

151

s.update(p1)

151

s.update(p1)

152

else:

152

else:

153

# none of the parent nodes are nullid

153

# none of the parent nodes are nullid

154

if p1 < p2:

154

if p1 < p2:

155

a = p1

155

a = p1

156

b = p2

156

b = p2

157

else:

157

else:

158

a = p2

158

a = p2

159

b = p1

159

b = p1

160

s = hashlib.sha1(a)

160

s = hashlib.sha1(a)

161

s.update(b)

161

s.update(b)

162

s.update(text)

162

s.update(text)

163

return s.digest()

163

return s.digest()

164

165

def _trimchunk(revlog, revs, startidx, endidx=None):

166

"""returns revs[startidx:endidx] without empty trailing revs

167

"""

168

length = revlog.length

169

170

if endidx is None:

171

endidx = len(revs)

172

173

# Trim empty revs at the end, but never the very first revision of a chain

174

while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:

175

endidx -= 1

176

177

return revs[startidx:endidx]

178

165

def _slicechunk(revlog, revs):

179

def _slicechunk(revlog, revs):

166

"""slice revs to reduce the amount of unrelated data to be read from disk.

180

"""slice revs to reduce the amount of unrelated data to be read from disk.

167

181

168

``revs`` is sliced into groups that should be read in one time.

182

``revs`` is sliced into groups that should be read in one time.

169

Assume that revs are sorted.

183

Assume that revs are sorted.

170

"""

184

"""

171

start = revlog.start

185

start = revlog.start

172

length = revlog.length

186

length = revlog.length

173

187

174

if len(revs) <= 1:

188

if len(revs) <= 1:

175

yield revs

189

yield revs

176

return

190

return

177

191

178

startbyte = start(revs[0])

192

startbyte = start(revs[0])

179

endbyte = start(revs[-1]) + length(revs[-1])

193

endbyte = start(revs[-1]) + length(revs[-1])

180

readdata = deltachainspan = endbyte - startbyte

194

readdata = deltachainspan = endbyte - startbyte

181

195

182

chainpayload = sum(length(r) for r in revs)

196

chainpayload = sum(length(r) for r in revs)

183

197

184

if deltachainspan:

198

if deltachainspan:

185

density = chainpayload / float(deltachainspan)

199

density = chainpayload / float(deltachainspan)

186

else:

200

else:

187

density = 1.0

201

density = 1.0

188

202

189

# Store the gaps in a heap to have them sorted by decreasing size

203

# Store the gaps in a heap to have them sorted by decreasing size

190

gapsheap = []

204

gapsheap = []

191

heapq.heapify(gapsheap)

205

heapq.heapify(gapsheap)

192

prevend = None

206

prevend = None

193

for i, rev in enumerate(revs):

207

for i, rev in enumerate(revs):

194

revstart = start(rev)

208

revstart = start(rev)

195

revlen = length(rev)

209

revlen = length(rev)

196

210

211

# Skip empty revisions to form larger holes

212

if revlen == 0:

213

continue

214

197

if prevend is not None:

215

if prevend is not None:

198

gapsize = revstart - prevend

216

gapsize = revstart - prevend

199

# only consider holes that are large enough

217

# only consider holes that are large enough

200

if gapsize > revlog._srmingapsize:

218

if gapsize > revlog._srmingapsize:

201

heapq.heappush(gapsheap, (-gapsize, i))

219

heapq.heappush(gapsheap, (-gapsize, i))

202

220

203

prevend = revstart + revlen

221

prevend = revstart + revlen

204

222

205

# Collect the indices of the largest holes until the density is acceptable

223

# Collect the indices of the largest holes until the density is acceptable

206

indicesheap = []

224

indicesheap = []

207

heapq.heapify(indicesheap)

225

heapq.heapify(indicesheap)

208

while gapsheap and density < revlog._srdensitythreshold:

226

while gapsheap and density < revlog._srdensitythreshold:

209

oppgapsize, gapidx = heapq.heappop(gapsheap)

227

oppgapsize, gapidx = heapq.heappop(gapsheap)

210

228

211

heapq.heappush(indicesheap, gapidx)

229

heapq.heappush(indicesheap, gapidx)

212

230

213

# the gap sizes are stored as negatives to be sorted decreasingly

231

# the gap sizes are stored as negatives to be sorted decreasingly

214

# by the heap

232

# by the heap

215

readdata -= (-oppgapsize)

233

readdata -= (-oppgapsize)

216

if readdata > 0:

234

if readdata > 0:

217

density = chainpayload / float(readdata)

235

density = chainpayload / float(readdata)

218

else:

236

else:

219

density = 1.0

237

density = 1.0

220

238

221

# Cut the revs at collected indices

239

# Cut the revs at collected indices

222

previdx = 0

240

previdx = 0

223

while indicesheap:

241

while indicesheap:

224

idx = heapq.heappop(indicesheap)

242

idx = heapq.heappop(indicesheap)

225

yield revs[previdx:idx]

243

244

chunk = _trimchunk(revlog, revs, previdx, idx)

245

if chunk:

246

yield chunk

247

226

previdx = idx

248

previdx = idx

227

yield revs[previdx:]

249

250

chunk = _trimchunk(revlog, revs, previdx)

251

if chunk:

252

yield chunk

228

253

229

# index v0:

254

# index v0:

230

# 4 bytes: offset

255

# 4 bytes: offset

231

# 4 bytes: compressed length

256

# 4 bytes: compressed length

232

# 4 bytes: base rev

257

# 4 bytes: base rev

233

# 4 bytes: link rev

258

# 4 bytes: link rev

234

# 20 bytes: parent 1 nodeid

259

# 20 bytes: parent 1 nodeid

235

# 20 bytes: parent 2 nodeid

260

# 20 bytes: parent 2 nodeid

236

# 20 bytes: nodeid

261

# 20 bytes: nodeid

237

indexformatv0 = struct.Struct(">4l20s20s20s")

262

indexformatv0 = struct.Struct(">4l20s20s20s")

238

indexformatv0_pack = indexformatv0.pack

263

indexformatv0_pack = indexformatv0.pack

239

indexformatv0_unpack = indexformatv0.unpack

264

indexformatv0_unpack = indexformatv0.unpack

240

265

241

class revlogoldio(object):

266

class revlogoldio(object):

242

def __init__(self):

267

def __init__(self):

243

self.size = indexformatv0.size

268

self.size = indexformatv0.size

244

269

245

def parseindex(self, data, inline):

270

def parseindex(self, data, inline):

246

s = self.size

271

s = self.size

247

index = []

272

index = []

248

nodemap = {nullid: nullrev}

273

nodemap = {nullid: nullrev}

249

n = off = 0

274

n = off = 0

250

l = len(data)

275

l = len(data)

251

while off + s <= l:

276

while off + s <= l:

252

cur = data[off:off + s]

277

cur = data[off:off + s]

253

off += s

278

off += s

254

e = indexformatv0_unpack(cur)

279

e = indexformatv0_unpack(cur)

255

# transform to revlogv1 format

280

# transform to revlogv1 format

256

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

281

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

257

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

282

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

258

index.append(e2)

283

index.append(e2)

259

nodemap[e[6]] = n

284

nodemap[e[6]] = n

260

n += 1

285

n += 1

261

286

262

# add the magic null revision at -1

287

# add the magic null revision at -1

263

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

288

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

264

289

265

return index, nodemap, None

290

return index, nodemap, None

266

291

267

def packentry(self, entry, node, version, rev):

292

def packentry(self, entry, node, version, rev):

268

if gettype(entry[0]):

293

if gettype(entry[0]):

269

raise RevlogError(_('index entry flags need revlog version 1'))

294

raise RevlogError(_('index entry flags need revlog version 1'))

270

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

295

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

271

node(entry[5]), node(entry[6]), entry[7])

296

node(entry[5]), node(entry[6]), entry[7])

272

return indexformatv0_pack(*e2)

297

return indexformatv0_pack(*e2)

273

298

274

# index ng:

299

# index ng:

275

# 6 bytes: offset

300

# 6 bytes: offset

276

# 2 bytes: flags

301

# 2 bytes: flags

277

# 4 bytes: compressed length

302

# 4 bytes: compressed length

278

# 4 bytes: uncompressed length

303

# 4 bytes: uncompressed length

279

# 4 bytes: base rev

304

# 4 bytes: base rev

280

# 4 bytes: link rev

305

# 4 bytes: link rev

281

# 4 bytes: parent 1 rev

306

# 4 bytes: parent 1 rev

282

# 4 bytes: parent 2 rev

307

# 4 bytes: parent 2 rev

283

# 32 bytes: nodeid

308

# 32 bytes: nodeid

284

indexformatng = struct.Struct(">Qiiiiii20s12x")

309

indexformatng = struct.Struct(">Qiiiiii20s12x")

285

indexformatng_pack = indexformatng.pack

310

indexformatng_pack = indexformatng.pack

286

versionformat = struct.Struct(">I")

311

versionformat = struct.Struct(">I")

287

versionformat_pack = versionformat.pack

312

versionformat_pack = versionformat.pack

288

versionformat_unpack = versionformat.unpack

313

versionformat_unpack = versionformat.unpack

289

314

290

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

315

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

291

# signed integer)

316

# signed integer)

292

_maxentrysize = 0x7fffffff

317

_maxentrysize = 0x7fffffff

293

318

294

class revlogio(object):

319

class revlogio(object):

295

def __init__(self):

320

def __init__(self):

296

self.size = indexformatng.size

321

self.size = indexformatng.size

297

322

298

def parseindex(self, data, inline):

323

def parseindex(self, data, inline):

299

# call the C implementation to parse the index data

324

# call the C implementation to parse the index data

300

index, cache = parsers.parse_index2(data, inline)

325

index, cache = parsers.parse_index2(data, inline)

301

return index, getattr(index, 'nodemap', None), cache

326

return index, getattr(index, 'nodemap', None), cache

302

327

303

def packentry(self, entry, node, version, rev):

328

def packentry(self, entry, node, version, rev):

304

p = indexformatng_pack(*entry)

329

p = indexformatng_pack(*entry)

305

if rev == 0:

330

if rev == 0:

306

p = versionformat_pack(version) + p[4:]

331

p = versionformat_pack(version) + p[4:]

307

return p

332

return p

308

333

309

class revlog(object):

334

class revlog(object):

310

"""

335

"""

311

the underlying revision storage object

336

the underlying revision storage object

312

337

313

A revlog consists of two parts, an index and the revision data.

338

A revlog consists of two parts, an index and the revision data.

314

339

315

The index is a file with a fixed record size containing

340

The index is a file with a fixed record size containing

316

information on each revision, including its nodeid (hash), the

341

information on each revision, including its nodeid (hash), the

317

nodeids of its parents, the position and offset of its data within

342

nodeids of its parents, the position and offset of its data within

318

the data file, and the revision it's based on. Finally, each entry

343

the data file, and the revision it's based on. Finally, each entry

319

contains a linkrev entry that can serve as a pointer to external

344

contains a linkrev entry that can serve as a pointer to external

320

data.

345

data.

321

346

322

The revision data itself is a linear collection of data chunks.

347

The revision data itself is a linear collection of data chunks.

323

Each chunk represents a revision and is usually represented as a

348

Each chunk represents a revision and is usually represented as a

324

delta against the previous chunk. To bound lookup time, runs of

349

delta against the previous chunk. To bound lookup time, runs of

325

deltas are limited to about 2 times the length of the original

350

deltas are limited to about 2 times the length of the original

326

version data. This makes retrieval of a version proportional to

351

version data. This makes retrieval of a version proportional to

327

its size, or O(1) relative to the number of revisions.

352

its size, or O(1) relative to the number of revisions.

328

353

329

Both pieces of the revlog are written to in an append-only

354

Both pieces of the revlog are written to in an append-only

330

fashion, which means we never need to rewrite a file to insert or

355

fashion, which means we never need to rewrite a file to insert or

331

remove data, and can use some simple techniques to avoid the need

356

remove data, and can use some simple techniques to avoid the need

332

for locking while reading.

357

for locking while reading.

333

358

334

If checkambig, indexfile is opened with checkambig=True at

359

If checkambig, indexfile is opened with checkambig=True at

335

writing, to avoid file stat ambiguity.

360

writing, to avoid file stat ambiguity.

336

361

337

If mmaplargeindex is True, and an mmapindexthreshold is set, the

362

If mmaplargeindex is True, and an mmapindexthreshold is set, the

338

index will be mmapped rather than read if it is larger than the

363

index will be mmapped rather than read if it is larger than the

339

configured threshold.

364

configured threshold.

340

"""

365

"""

341

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

366

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

342

mmaplargeindex=False):

367

mmaplargeindex=False):

343

"""

368

"""

344

create a revlog object

369

create a revlog object

345

370

346

opener is a function that abstracts the file opening operation

371

opener is a function that abstracts the file opening operation

347

and can be used to implement COW semantics or the like.

372

and can be used to implement COW semantics or the like.

348

"""

373

"""

349

self.indexfile = indexfile

374

self.indexfile = indexfile

350

self.datafile = datafile or (indexfile[:-2] + ".d")

375

self.datafile = datafile or (indexfile[:-2] + ".d")

351

self.opener = opener

376

self.opener = opener

352

# When True, indexfile is opened with checkambig=True at writing, to

377

# When True, indexfile is opened with checkambig=True at writing, to

353

# avoid file stat ambiguity.

378

# avoid file stat ambiguity.

354

self._checkambig = checkambig

379

self._checkambig = checkambig

355

# 3-tuple of (node, rev, text) for a raw revision.

380

# 3-tuple of (node, rev, text) for a raw revision.

356

self._cache = None

381

self._cache = None

357

# Maps rev to chain base rev.

382

# Maps rev to chain base rev.

358

self._chainbasecache = util.lrucachedict(100)

383

self._chainbasecache = util.lrucachedict(100)

359

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

384

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

360

self._chunkcache = (0, '')

385

self._chunkcache = (0, '')

361

# How much data to read and cache into the raw revlog data cache.

386

# How much data to read and cache into the raw revlog data cache.

362

self._chunkcachesize = 65536

387

self._chunkcachesize = 65536

363

self._maxchainlen = None

388

self._maxchainlen = None

364

self._aggressivemergedeltas = False

389

self._aggressivemergedeltas = False

365

self.index = []

390

self.index = []

366

# Mapping of partial identifiers to full nodes.

391

# Mapping of partial identifiers to full nodes.

367

self._pcache = {}

392

self._pcache = {}

368

# Mapping of revision integer to full node.

393

# Mapping of revision integer to full node.

369

self._nodecache = {nullid: nullrev}

394

self._nodecache = {nullid: nullrev}

370

self._nodepos = None

395

self._nodepos = None

371

self._compengine = 'zlib'

396

self._compengine = 'zlib'

372

self._maxdeltachainspan = -1

397

self._maxdeltachainspan = -1

373

self._withsparseread = False

398

self._withsparseread = False

374

self._srdensitythreshold = 0.25

399

self._srdensitythreshold = 0.25

375

self._srmingapsize = 262144

400

self._srmingapsize = 262144

376

401

377

mmapindexthreshold = None

402

mmapindexthreshold = None

378

v = REVLOG_DEFAULT_VERSION

403

v = REVLOG_DEFAULT_VERSION

379

opts = getattr(opener, 'options', None)

404

opts = getattr(opener, 'options', None)

380

if opts is not None:

405

if opts is not None:

381

if 'revlogv2' in opts:

406

if 'revlogv2' in opts:

382

# version 2 revlogs always use generaldelta.

407

# version 2 revlogs always use generaldelta.

383

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

408

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

384

elif 'revlogv1' in opts:

409

elif 'revlogv1' in opts:

385

if 'generaldelta' in opts:

410

if 'generaldelta' in opts:

386

v |= FLAG_GENERALDELTA

411

v |= FLAG_GENERALDELTA

387

else:

412

else:

388

v = 0

413

v = 0

389

if 'chunkcachesize' in opts:

414

if 'chunkcachesize' in opts:

390

self._chunkcachesize = opts['chunkcachesize']

415

self._chunkcachesize = opts['chunkcachesize']

391

if 'maxchainlen' in opts:

416

if 'maxchainlen' in opts:

392

self._maxchainlen = opts['maxchainlen']

417

self._maxchainlen = opts['maxchainlen']

393

if 'aggressivemergedeltas' in opts:

418

if 'aggressivemergedeltas' in opts:

394

self._aggressivemergedeltas = opts['aggressivemergedeltas']

419

self._aggressivemergedeltas = opts['aggressivemergedeltas']

395

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

420

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

396

if 'compengine' in opts:

421

if 'compengine' in opts:

397

self._compengine = opts['compengine']

422

self._compengine = opts['compengine']

398

if 'maxdeltachainspan' in opts:

423

if 'maxdeltachainspan' in opts:

399

self._maxdeltachainspan = opts['maxdeltachainspan']

424

self._maxdeltachainspan = opts['maxdeltachainspan']

400

if mmaplargeindex and 'mmapindexthreshold' in opts:

425

if mmaplargeindex and 'mmapindexthreshold' in opts:

401

mmapindexthreshold = opts['mmapindexthreshold']

426

mmapindexthreshold = opts['mmapindexthreshold']

402

self._withsparseread = bool(opts.get('with-sparse-read', False))

427

self._withsparseread = bool(opts.get('with-sparse-read', False))

403

if 'sparse-read-density-threshold' in opts:

428

if 'sparse-read-density-threshold' in opts:

404

self._srdensitythreshold = opts['sparse-read-density-threshold']

429

self._srdensitythreshold = opts['sparse-read-density-threshold']

405

if 'sparse-read-min-gap-size' in opts:

430

if 'sparse-read-min-gap-size' in opts:

406

self._srmingapsize = opts['sparse-read-min-gap-size']

431

self._srmingapsize = opts['sparse-read-min-gap-size']

407

432

408

if self._chunkcachesize <= 0:

433

if self._chunkcachesize <= 0:

409

raise RevlogError(_('revlog chunk cache size %r is not greater '

434

raise RevlogError(_('revlog chunk cache size %r is not greater '

410

'than 0') % self._chunkcachesize)

435

'than 0') % self._chunkcachesize)

411

elif self._chunkcachesize & (self._chunkcachesize - 1):

436

elif self._chunkcachesize & (self._chunkcachesize - 1):

412

raise RevlogError(_('revlog chunk cache size %r is not a power '

437

raise RevlogError(_('revlog chunk cache size %r is not a power '

413

'of 2') % self._chunkcachesize)

438

'of 2') % self._chunkcachesize)

414

439

415

indexdata = ''

440

indexdata = ''

416

self._initempty = True

441

self._initempty = True

417

try:

442

try:

418

f = self.opener(self.indexfile)

443

f = self.opener(self.indexfile)

419

if (mmapindexthreshold is not None and

444

if (mmapindexthreshold is not None and

420

self.opener.fstat(f).st_size >= mmapindexthreshold):

445

self.opener.fstat(f).st_size >= mmapindexthreshold):

421

indexdata = util.buffer(util.mmapread(f))

446

indexdata = util.buffer(util.mmapread(f))

422

else:

447

else:

423

indexdata = f.read()

448

indexdata = f.read()

424

f.close()

449

f.close()

425

if len(indexdata) > 0:

450

if len(indexdata) > 0:

426

v = versionformat_unpack(indexdata[:4])[0]

451

v = versionformat_unpack(indexdata[:4])[0]

427

self._initempty = False

452

self._initempty = False

428

except IOError as inst:

453

except IOError as inst:

429

if inst.errno != errno.ENOENT:

454

if inst.errno != errno.ENOENT:

430

raise

455

raise

431

456

432

self.version = v

457

self.version = v

433

self._inline = v & FLAG_INLINE_DATA

458

self._inline = v & FLAG_INLINE_DATA

434

self._generaldelta = v & FLAG_GENERALDELTA

459

self._generaldelta = v & FLAG_GENERALDELTA

435

flags = v & ~0xFFFF

460

flags = v & ~0xFFFF

436

fmt = v & 0xFFFF

461

fmt = v & 0xFFFF

437

if fmt == REVLOGV0:

462

if fmt == REVLOGV0:

438

if flags:

463

if flags:

439

raise RevlogError(_('unknown flags (%#04x) in version %d '

464

raise RevlogError(_('unknown flags (%#04x) in version %d '

440

'revlog %s') %

465

'revlog %s') %

441

(flags >> 16, fmt, self.indexfile))

466

(flags >> 16, fmt, self.indexfile))

442

elif fmt == REVLOGV1:

467

elif fmt == REVLOGV1:

443

if flags & ~REVLOGV1_FLAGS:

468

if flags & ~REVLOGV1_FLAGS:

444

raise RevlogError(_('unknown flags (%#04x) in version %d '

469

raise RevlogError(_('unknown flags (%#04x) in version %d '

445

'revlog %s') %

470

'revlog %s') %

446

(flags >> 16, fmt, self.indexfile))

471

(flags >> 16, fmt, self.indexfile))

447

elif fmt == REVLOGV2:

472

elif fmt == REVLOGV2:

448

if flags & ~REVLOGV2_FLAGS:

473

if flags & ~REVLOGV2_FLAGS:

449

raise RevlogError(_('unknown flags (%#04x) in version %d '

474

raise RevlogError(_('unknown flags (%#04x) in version %d '

450

'revlog %s') %

475

'revlog %s') %

451

(flags >> 16, fmt, self.indexfile))

476

(flags >> 16, fmt, self.indexfile))

452

else:

477

else:

453

raise RevlogError(_('unknown version (%d) in revlog %s') %

478

raise RevlogError(_('unknown version (%d) in revlog %s') %

454

(fmt, self.indexfile))

479

(fmt, self.indexfile))

455

480

456

self.storedeltachains = True

481

self.storedeltachains = True

457

482

458

self._io = revlogio()

483

self._io = revlogio()

459

if self.version == REVLOGV0:

484

if self.version == REVLOGV0:

460

self._io = revlogoldio()

485

self._io = revlogoldio()

461

try:

486

try:

462

d = self._io.parseindex(indexdata, self._inline)

487

d = self._io.parseindex(indexdata, self._inline)

463

except (ValueError, IndexError):

488

except (ValueError, IndexError):

464

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

489

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

465

self.index, nodemap, self._chunkcache = d

490

self.index, nodemap, self._chunkcache = d

466

if nodemap is not None:

491

if nodemap is not None:

467

self.nodemap = self._nodecache = nodemap

492

self.nodemap = self._nodecache = nodemap

468

if not self._chunkcache:

493

if not self._chunkcache:

469

self._chunkclear()

494

self._chunkclear()

470

# revnum -> (chain-length, sum-delta-length)

495

# revnum -> (chain-length, sum-delta-length)

471

self._chaininfocache = {}

496

self._chaininfocache = {}

472

# revlog header -> revlog compressor

497

# revlog header -> revlog compressor

473

self._decompressors = {}

498

self._decompressors = {}

474

499

475

@util.propertycache

500

@util.propertycache

476

def _compressor(self):

501

def _compressor(self):

477

return util.compengines[self._compengine].revlogcompressor()

502

return util.compengines[self._compengine].revlogcompressor()

478

503

479

def tip(self):

504

def tip(self):

480

return self.node(len(self.index) - 2)

505

return self.node(len(self.index) - 2)

481

def __contains__(self, rev):

506

def __contains__(self, rev):

482

return 0 <= rev < len(self)

507

return 0 <= rev < len(self)

483

def __len__(self):

508

def __len__(self):

484

return len(self.index) - 1

509

return len(self.index) - 1

485

def __iter__(self):

510

def __iter__(self):

486

return iter(xrange(len(self)))

511

return iter(xrange(len(self)))

487

def revs(self, start=0, stop=None):

512

def revs(self, start=0, stop=None):

488

"""iterate over all rev in this revlog (from start to stop)"""

513

"""iterate over all rev in this revlog (from start to stop)"""

489

step = 1

514

step = 1

490

if stop is not None:

515

if stop is not None:

491

if start > stop:

516

if start > stop:

492

step = -1

517

step = -1

493

stop += step

518

stop += step

494

else:

519

else:

495

stop = len(self)

520

stop = len(self)

496

return xrange(start, stop, step)

521

return xrange(start, stop, step)

497

522

498

@util.propertycache

523

@util.propertycache

499

def nodemap(self):

524

def nodemap(self):

500

self.rev(self.node(0))

525

self.rev(self.node(0))

501

return self._nodecache

526

return self._nodecache

502

527

503

def hasnode(self, node):

528

def hasnode(self, node):

504

try:

529

try:

505

self.rev(node)

530

self.rev(node)

506

return True

531

return True

507

except KeyError:

532

except KeyError:

508

return False

533

return False

509

534

510

def clearcaches(self):

535

def clearcaches(self):

511

self._cache = None

536

self._cache = None

512

self._chainbasecache.clear()

537

self._chainbasecache.clear()

513

self._chunkcache = (0, '')

538

self._chunkcache = (0, '')

514

self._pcache = {}

539

self._pcache = {}

515

540

516

try:

541

try:

517

self._nodecache.clearcaches()

542

self._nodecache.clearcaches()

518

except AttributeError:

543

except AttributeError:

519

self._nodecache = {nullid: nullrev}

544

self._nodecache = {nullid: nullrev}

520

self._nodepos = None

545

self._nodepos = None

521

546

522

def rev(self, node):

547

def rev(self, node):

523

try:

548

try:

524

return self._nodecache[node]

549

return self._nodecache[node]

525

except TypeError:

550

except TypeError:

526

raise

551

raise

527

except RevlogError:

552

except RevlogError:

528

# parsers.c radix tree lookup failed

553

# parsers.c radix tree lookup failed

529

if node == wdirid:

554

if node == wdirid:

530

raise error.WdirUnsupported

555

raise error.WdirUnsupported

531

raise LookupError(node, self.indexfile, _('no node'))

556

raise LookupError(node, self.indexfile, _('no node'))

532

except KeyError:

557

except KeyError:

533

# pure python cache lookup failed

558

# pure python cache lookup failed

534

n = self._nodecache

559

n = self._nodecache

535

i = self.index

560

i = self.index

536

p = self._nodepos

561

p = self._nodepos

537

if p is None:

562

if p is None:

538

p = len(i) - 2

563

p = len(i) - 2

539

for r in xrange(p, -1, -1):

564

for r in xrange(p, -1, -1):

540

v = i[r][7]

565

v = i[r][7]

541

n[v] = r

566

n[v] = r

542

if v == node:

567

if v == node:

543

self._nodepos = r - 1

568

self._nodepos = r - 1

544

return r

569

return r

545

if node == wdirid:

570

if node == wdirid:

546

raise error.WdirUnsupported

571

raise error.WdirUnsupported

547

raise LookupError(node, self.indexfile, _('no node'))

572

raise LookupError(node, self.indexfile, _('no node'))

548

573

549

# Accessors for index entries.

574

# Accessors for index entries.

550

575

551

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

576

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

552

# are flags.

577

# are flags.

553

def start(self, rev):

578

def start(self, rev):

554

return int(self.index[rev][0] >> 16)

579

return int(self.index[rev][0] >> 16)

555

580

556

def flags(self, rev):

581

def flags(self, rev):

557

return self.index[rev][0] & 0xFFFF

582

return self.index[rev][0] & 0xFFFF

558

583

559

def length(self, rev):

584

def length(self, rev):

560

return self.index[rev][1]

585

return self.index[rev][1]

561

586

562

def rawsize(self, rev):

587

def rawsize(self, rev):

563

"""return the length of the uncompressed text for a given revision"""

588

"""return the length of the uncompressed text for a given revision"""

564

l = self.index[rev][2]

589

l = self.index[rev][2]

565

if l >= 0:

590

if l >= 0:

566

return l

591

return l

567

592

568

t = self.revision(rev, raw=True)

593

t = self.revision(rev, raw=True)

569

return len(t)

594

return len(t)

570

595

571

def size(self, rev):

596

def size(self, rev):

572

"""length of non-raw text (processed by a "read" flag processor)"""

597

"""length of non-raw text (processed by a "read" flag processor)"""

573

# fast path: if no "read" flag processor could change the content,

598

# fast path: if no "read" flag processor could change the content,

574

# size is rawsize. note: ELLIPSIS is known to not change the content.

599

# size is rawsize. note: ELLIPSIS is known to not change the content.

575

flags = self.flags(rev)

600

flags = self.flags(rev)

576

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

601

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

577

return self.rawsize(rev)

602

return self.rawsize(rev)

578

603

579

return len(self.revision(rev, raw=False))

604

return len(self.revision(rev, raw=False))

580

605

581

def chainbase(self, rev):

606

def chainbase(self, rev):

582

base = self._chainbasecache.get(rev)

607

base = self._chainbasecache.get(rev)

583

if base is not None:

608

if base is not None:

584

return base

609

return base

585

610

586

index = self.index

611

index = self.index

587

base = index[rev][3]

612

base = index[rev][3]

588

while base != rev:

613

while base != rev:

589

rev = base

614

rev = base

590

base = index[rev][3]

615

base = index[rev][3]

591

616

592

self._chainbasecache[rev] = base

617

self._chainbasecache[rev] = base

593

return base

618

return base

594

619

595

def linkrev(self, rev):

620

def linkrev(self, rev):

596

return self.index[rev][4]

621

return self.index[rev][4]

597

622

598

def parentrevs(self, rev):

623

def parentrevs(self, rev):

599

try:

624

try:

600

return self.index[rev][5:7]

625

return self.index[rev][5:7]

601

except IndexError:

626

except IndexError:

602

if rev == wdirrev:

627

if rev == wdirrev:

603

raise error.WdirUnsupported

628

raise error.WdirUnsupported

604

raise

629

raise

605

630

606

def node(self, rev):

631

def node(self, rev):

607

try:

632

try:

608

return self.index[rev][7]

633

return self.index[rev][7]

609

except IndexError:

634

except IndexError:

610

if rev == wdirrev:

635

if rev == wdirrev:

611

raise error.WdirUnsupported

636

raise error.WdirUnsupported

612

raise

637

raise

613

638

614

# Derived from index values.

639

# Derived from index values.

615

640

616

def end(self, rev):

641

def end(self, rev):

617

return self.start(rev) + self.length(rev)

642

return self.start(rev) + self.length(rev)

618

643

619

def parents(self, node):

644

def parents(self, node):

620

i = self.index

645

i = self.index

621

d = i[self.rev(node)]

646

d = i[self.rev(node)]

622

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

647

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

623

648

624

def chainlen(self, rev):

649

def chainlen(self, rev):

625

return self._chaininfo(rev)[0]

650

return self._chaininfo(rev)[0]

626

651

627

def _chaininfo(self, rev):

652

def _chaininfo(self, rev):

628

chaininfocache = self._chaininfocache

653

chaininfocache = self._chaininfocache

629

if rev in chaininfocache:

654

if rev in chaininfocache:

630

return chaininfocache[rev]

655

return chaininfocache[rev]

631

index = self.index

656

index = self.index

632

generaldelta = self._generaldelta

657

generaldelta = self._generaldelta

633

iterrev = rev

658

iterrev = rev

634

e = index[iterrev]

659

e = index[iterrev]

635

clen = 0

660

clen = 0

636

compresseddeltalen = 0

661

compresseddeltalen = 0

637

while iterrev != e[3]:

662

while iterrev != e[3]:

638

clen += 1

663

clen += 1

639

compresseddeltalen += e[1]

664

compresseddeltalen += e[1]

640

if generaldelta:

665

if generaldelta:

641

iterrev = e[3]

666

iterrev = e[3]

642

else:

667

else:

643

iterrev -= 1

668

iterrev -= 1

644

if iterrev in chaininfocache:

669

if iterrev in chaininfocache:

645

t = chaininfocache[iterrev]

670

t = chaininfocache[iterrev]

646

clen += t[0]

671

clen += t[0]

647

compresseddeltalen += t[1]

672

compresseddeltalen += t[1]

648

break

673

break

649

e = index[iterrev]

674

e = index[iterrev]

650

else:

675

else:

651

# Add text length of base since decompressing that also takes

676

# Add text length of base since decompressing that also takes

652

# work. For cache hits the length is already included.

677

# work. For cache hits the length is already included.

653

compresseddeltalen += e[1]

678

compresseddeltalen += e[1]

654

r = (clen, compresseddeltalen)

679

r = (clen, compresseddeltalen)

655

chaininfocache[rev] = r

680

chaininfocache[rev] = r

656

return r

681

return r

657

682

658

def _deltachain(self, rev, stoprev=None):

683

def _deltachain(self, rev, stoprev=None):

659

"""Obtain the delta chain for a revision.

684

"""Obtain the delta chain for a revision.

660

685

661

``stoprev`` specifies a revision to stop at. If not specified, we

686

``stoprev`` specifies a revision to stop at. If not specified, we

662

stop at the base of the chain.

687

stop at the base of the chain.

663

688

664

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

689

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

665

revs in ascending order and ``stopped`` is a bool indicating whether

690

revs in ascending order and ``stopped`` is a bool indicating whether

666

``stoprev`` was hit.

691

``stoprev`` was hit.

667

"""

692

"""

668

# Try C implementation.

693

# Try C implementation.

669

try:

694

try:

670

return self.index.deltachain(rev, stoprev, self._generaldelta)

695

return self.index.deltachain(rev, stoprev, self._generaldelta)

671

except AttributeError:

696

except AttributeError:

672

pass

697

pass

673

698

674

chain = []

699

chain = []

675

700

676

# Alias to prevent attribute lookup in tight loop.

701

# Alias to prevent attribute lookup in tight loop.

677

index = self.index

702

index = self.index

678

generaldelta = self._generaldelta

703

generaldelta = self._generaldelta

679

704

680

iterrev = rev

705

iterrev = rev

681

e = index[iterrev]

706

e = index[iterrev]

682

while iterrev != e[3] and iterrev != stoprev:

707

while iterrev != e[3] and iterrev != stoprev:

683

chain.append(iterrev)

708

chain.append(iterrev)

684

if generaldelta:

709

if generaldelta:

685

iterrev = e[3]

710

iterrev = e[3]

686

else:

711

else:

687

iterrev -= 1

712

iterrev -= 1

688

e = index[iterrev]

713

e = index[iterrev]

689

714

690

if iterrev == stoprev:

715

if iterrev == stoprev:

691

stopped = True

716

stopped = True

692

else:

717

else:

693

chain.append(iterrev)

718

chain.append(iterrev)

694

stopped = False

719

stopped = False

695

720

696

chain.reverse()

721

chain.reverse()

697

return chain, stopped

722

return chain, stopped

698

723

699

def ancestors(self, revs, stoprev=0, inclusive=False):

724

def ancestors(self, revs, stoprev=0, inclusive=False):

700

"""Generate the ancestors of 'revs' in reverse topological order.

725

"""Generate the ancestors of 'revs' in reverse topological order.

701

Does not generate revs lower than stoprev.

726

Does not generate revs lower than stoprev.

702

727

703

See the documentation for ancestor.lazyancestors for more details."""

728

See the documentation for ancestor.lazyancestors for more details."""

704

729

705

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

730

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

706

inclusive=inclusive)

731

inclusive=inclusive)

707

732

708

def descendants(self, revs):

733

def descendants(self, revs):

709

"""Generate the descendants of 'revs' in revision order.

734

"""Generate the descendants of 'revs' in revision order.

710

735

711

Yield a sequence of revision numbers starting with a child of

736

Yield a sequence of revision numbers starting with a child of

712

some rev in revs, i.e., each revision is *not* considered a

737

some rev in revs, i.e., each revision is *not* considered a

713

descendant of itself. Results are ordered by revision number (a

738

descendant of itself. Results are ordered by revision number (a

714

topological sort)."""

739

topological sort)."""

715

first = min(revs)

740

first = min(revs)

716

if first == nullrev:

741

if first == nullrev:

717

for i in self:

742

for i in self:

718

yield i

743

yield i

719

return

744

return

720

745

721

seen = set(revs)

746

seen = set(revs)

722

for i in self.revs(start=first + 1):

747

for i in self.revs(start=first + 1):

723

for x in self.parentrevs(i):

748

for x in self.parentrevs(i):

724

if x != nullrev and x in seen:

749

if x != nullrev and x in seen:

725

seen.add(i)

750

seen.add(i)

726

yield i

751

yield i

727

break

752

break

728

753

729

def findcommonmissing(self, common=None, heads=None):

754

def findcommonmissing(self, common=None, heads=None):

730

"""Return a tuple of the ancestors of common and the ancestors of heads

755

"""Return a tuple of the ancestors of common and the ancestors of heads

731

that are not ancestors of common. In revset terminology, we return the

756

that are not ancestors of common. In revset terminology, we return the

732

tuple:

757

tuple:

733

758

734

::common, (::heads) - (::common)

759

::common, (::heads) - (::common)

735

760

736

The list is sorted by revision number, meaning it is

761

The list is sorted by revision number, meaning it is

737

topologically sorted.

762

topologically sorted.

738

763

739

'heads' and 'common' are both lists of node IDs. If heads is

764

'heads' and 'common' are both lists of node IDs. If heads is

740

not supplied, uses all of the revlog's heads. If common is not

765

not supplied, uses all of the revlog's heads. If common is not

741

supplied, uses nullid."""

766

supplied, uses nullid."""

742

if common is None:

767

if common is None:

743

common = [nullid]

768

common = [nullid]

744

if heads is None:

769

if heads is None:

745

heads = self.heads()

770

heads = self.heads()

746

771

747

common = [self.rev(n) for n in common]

772

common = [self.rev(n) for n in common]

748

heads = [self.rev(n) for n in heads]

773

heads = [self.rev(n) for n in heads]

749

774

750

# we want the ancestors, but inclusive

775

# we want the ancestors, but inclusive

751

class lazyset(object):

776

class lazyset(object):

752

def __init__(self, lazyvalues):

777

def __init__(self, lazyvalues):

753

self.addedvalues = set()

778

self.addedvalues = set()

754

self.lazyvalues = lazyvalues

779

self.lazyvalues = lazyvalues

755

780

756

def __contains__(self, value):

781

def __contains__(self, value):

757

return value in self.addedvalues or value in self.lazyvalues

782

return value in self.addedvalues or value in self.lazyvalues

758

783

759

def __iter__(self):

784

def __iter__(self):

760

added = self.addedvalues

785

added = self.addedvalues

761

for r in added:

786

for r in added:

762

yield r

787

yield r

763

for r in self.lazyvalues:

788

for r in self.lazyvalues:

764

if not r in added:

789

if not r in added:

765

yield r

790

yield r

766

791

767

def add(self, value):

792

def add(self, value):

768

self.addedvalues.add(value)

793

self.addedvalues.add(value)

769

794

770

def update(self, values):

795

def update(self, values):

771

self.addedvalues.update(values)

796

self.addedvalues.update(values)

772

797

773

has = lazyset(self.ancestors(common))

798

has = lazyset(self.ancestors(common))

774

has.add(nullrev)

799

has.add(nullrev)

775

has.update(common)

800

has.update(common)

776

801

777

# take all ancestors from heads that aren't in has

802

# take all ancestors from heads that aren't in has

778

missing = set()

803

missing = set()

779

visit = collections.deque(r for r in heads if r not in has)

804

visit = collections.deque(r for r in heads if r not in has)

780

while visit:

805

while visit:

781

r = visit.popleft()

806

r = visit.popleft()

782

if r in missing:

807

if r in missing:

783

continue

808

continue

784

else:

809

else:

785

missing.add(r)

810

missing.add(r)

786

for p in self.parentrevs(r):

811

for p in self.parentrevs(r):

787

if p not in has:

812

if p not in has:

788

visit.append(p)

813

visit.append(p)

789

missing = list(missing)

814

missing = list(missing)

790

missing.sort()

815

missing.sort()

791

return has, [self.node(miss) for miss in missing]

816

return has, [self.node(miss) for miss in missing]

792

817

793

def incrementalmissingrevs(self, common=None):

818

def incrementalmissingrevs(self, common=None):

794

"""Return an object that can be used to incrementally compute the

819

"""Return an object that can be used to incrementally compute the

795

revision numbers of the ancestors of arbitrary sets that are not

820

revision numbers of the ancestors of arbitrary sets that are not

796

ancestors of common. This is an ancestor.incrementalmissingancestors

821

ancestors of common. This is an ancestor.incrementalmissingancestors

797

object.

822

object.

798

823

799

'common' is a list of revision numbers. If common is not supplied, uses

824

'common' is a list of revision numbers. If common is not supplied, uses

800

nullrev.

825

nullrev.

801

"""

826

"""

802

if common is None:

827

if common is None:

803

common = [nullrev]

828

common = [nullrev]

804

829

805

return ancestor.incrementalmissingancestors(self.parentrevs, common)

830

return ancestor.incrementalmissingancestors(self.parentrevs, common)

806

831

807

def findmissingrevs(self, common=None, heads=None):

832

def findmissingrevs(self, common=None, heads=None):

808

"""Return the revision numbers of the ancestors of heads that

833

"""Return the revision numbers of the ancestors of heads that

809

are not ancestors of common.

834

are not ancestors of common.

810

835

811

More specifically, return a list of revision numbers corresponding to

836

More specifically, return a list of revision numbers corresponding to

812

nodes N such that every N satisfies the following constraints:

837

nodes N such that every N satisfies the following constraints:

813

838

814

1. N is an ancestor of some node in 'heads'

839

1. N is an ancestor of some node in 'heads'

815

2. N is not an ancestor of any node in 'common'

840

2. N is not an ancestor of any node in 'common'

816

841

817

The list is sorted by revision number, meaning it is

842

The list is sorted by revision number, meaning it is

818

topologically sorted.

843

topologically sorted.

819

844

820

'heads' and 'common' are both lists of revision numbers. If heads is

845

'heads' and 'common' are both lists of revision numbers. If heads is

821

not supplied, uses all of the revlog's heads. If common is not

846

not supplied, uses all of the revlog's heads. If common is not

822

supplied, uses nullid."""

847

supplied, uses nullid."""

823

if common is None:

848

if common is None:

824

common = [nullrev]

849

common = [nullrev]

825

if heads is None:

850

if heads is None:

826

heads = self.headrevs()

851

heads = self.headrevs()

827

852

828

inc = self.incrementalmissingrevs(common=common)

853

inc = self.incrementalmissingrevs(common=common)

829

return inc.missingancestors(heads)

854

return inc.missingancestors(heads)

830

855

831

def findmissing(self, common=None, heads=None):

856

def findmissing(self, common=None, heads=None):

832

"""Return the ancestors of heads that are not ancestors of common.

857

"""Return the ancestors of heads that are not ancestors of common.

833

858

834

More specifically, return a list of nodes N such that every N

859

More specifically, return a list of nodes N such that every N

835

satisfies the following constraints:

860

satisfies the following constraints:

836

861

837

1. N is an ancestor of some node in 'heads'

862

1. N is an ancestor of some node in 'heads'

838

2. N is not an ancestor of any node in 'common'

863

2. N is not an ancestor of any node in 'common'

839

864

840

The list is sorted by revision number, meaning it is

865

The list is sorted by revision number, meaning it is

841

topologically sorted.

866

topologically sorted.

842

867

843

'heads' and 'common' are both lists of node IDs. If heads is

868

'heads' and 'common' are both lists of node IDs. If heads is

844

not supplied, uses all of the revlog's heads. If common is not

869

not supplied, uses all of the revlog's heads. If common is not

845

supplied, uses nullid."""

870

supplied, uses nullid."""

846

if common is None:

871

if common is None:

847

common = [nullid]

872

common = [nullid]

848

if heads is None:

873

if heads is None:

849

heads = self.heads()

874

heads = self.heads()

850

875

851

common = [self.rev(n) for n in common]

876

common = [self.rev(n) for n in common]

852

heads = [self.rev(n) for n in heads]

877

heads = [self.rev(n) for n in heads]

853

878

854

inc = self.incrementalmissingrevs(common=common)

879

inc = self.incrementalmissingrevs(common=common)

855

return [self.node(r) for r in inc.missingancestors(heads)]

880

return [self.node(r) for r in inc.missingancestors(heads)]

856

881

857

def nodesbetween(self, roots=None, heads=None):

882

def nodesbetween(self, roots=None, heads=None):

858

"""Return a topological path from 'roots' to 'heads'.

883

"""Return a topological path from 'roots' to 'heads'.

859

884

860

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

885

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

861

topologically sorted list of all nodes N that satisfy both of

886

topologically sorted list of all nodes N that satisfy both of

862

these constraints:

887

these constraints:

863

888

864

1. N is a descendant of some node in 'roots'

889

1. N is a descendant of some node in 'roots'

865

2. N is an ancestor of some node in 'heads'

890

2. N is an ancestor of some node in 'heads'

866

891

867

Every node is considered to be both a descendant and an ancestor

892

Every node is considered to be both a descendant and an ancestor

868

of itself, so every reachable node in 'roots' and 'heads' will be

893

of itself, so every reachable node in 'roots' and 'heads' will be

869

included in 'nodes'.

894

included in 'nodes'.

870

895

871

'outroots' is the list of reachable nodes in 'roots', i.e., the

896

'outroots' is the list of reachable nodes in 'roots', i.e., the

872

subset of 'roots' that is returned in 'nodes'. Likewise,

897

subset of 'roots' that is returned in 'nodes'. Likewise,

873

'outheads' is the subset of 'heads' that is also in 'nodes'.

898

'outheads' is the subset of 'heads' that is also in 'nodes'.

874

899

875

'roots' and 'heads' are both lists of node IDs. If 'roots' is

900

'roots' and 'heads' are both lists of node IDs. If 'roots' is

876

unspecified, uses nullid as the only root. If 'heads' is

901

unspecified, uses nullid as the only root. If 'heads' is

877

unspecified, uses list of all of the revlog's heads."""

902

unspecified, uses list of all of the revlog's heads."""

878

nonodes = ([], [], [])

903

nonodes = ([], [], [])

879

if roots is not None:

904

if roots is not None:

880

roots = list(roots)

905

roots = list(roots)

881

if not roots:

906

if not roots:

882

return nonodes

907

return nonodes

883

lowestrev = min([self.rev(n) for n in roots])

908

lowestrev = min([self.rev(n) for n in roots])

884

else:

909

else:

885

roots = [nullid] # Everybody's a descendant of nullid

910

roots = [nullid] # Everybody's a descendant of nullid

886

lowestrev = nullrev

911

lowestrev = nullrev

887

if (lowestrev == nullrev) and (heads is None):

912

if (lowestrev == nullrev) and (heads is None):

888

# We want _all_ the nodes!

913

# We want _all_ the nodes!

889

return ([self.node(r) for r in self], [nullid], list(self.heads()))

914

return ([self.node(r) for r in self], [nullid], list(self.heads()))

890

if heads is None:

915

if heads is None:

891

# All nodes are ancestors, so the latest ancestor is the last

916

# All nodes are ancestors, so the latest ancestor is the last

892

# node.

917

# node.

893

highestrev = len(self) - 1

918

highestrev = len(self) - 1

894

# Set ancestors to None to signal that every node is an ancestor.

919

# Set ancestors to None to signal that every node is an ancestor.

895

ancestors = None

920

ancestors = None

896

# Set heads to an empty dictionary for later discovery of heads

921

# Set heads to an empty dictionary for later discovery of heads

897

heads = {}

922

heads = {}

898

else:

923

else:

899

heads = list(heads)

924

heads = list(heads)

900

if not heads:

925

if not heads:

901

return nonodes

926

return nonodes

902

ancestors = set()

927

ancestors = set()

903

# Turn heads into a dictionary so we can remove 'fake' heads.

928

# Turn heads into a dictionary so we can remove 'fake' heads.

904

# Also, later we will be using it to filter out the heads we can't

929

# Also, later we will be using it to filter out the heads we can't

905

# find from roots.

930

# find from roots.

906

heads = dict.fromkeys(heads, False)

931

heads = dict.fromkeys(heads, False)

907

# Start at the top and keep marking parents until we're done.

932

# Start at the top and keep marking parents until we're done.

908

nodestotag = set(heads)

933

nodestotag = set(heads)

909

# Remember where the top was so we can use it as a limit later.

934

# Remember where the top was so we can use it as a limit later.

910

highestrev = max([self.rev(n) for n in nodestotag])

935

highestrev = max([self.rev(n) for n in nodestotag])

911

while nodestotag:

936

while nodestotag:

912

# grab a node to tag

937

# grab a node to tag

913

n = nodestotag.pop()

938

n = nodestotag.pop()

914

# Never tag nullid

939

# Never tag nullid

915

if n == nullid:

940

if n == nullid:

916

continue

941

continue

917

# A node's revision number represents its place in a

942

# A node's revision number represents its place in a

918

# topologically sorted list of nodes.

943

# topologically sorted list of nodes.

919

r = self.rev(n)

944

r = self.rev(n)

920

if r >= lowestrev:

945

if r >= lowestrev:

921

if n not in ancestors:

946

if n not in ancestors:

922

# If we are possibly a descendant of one of the roots

947

# If we are possibly a descendant of one of the roots

923

# and we haven't already been marked as an ancestor

948

# and we haven't already been marked as an ancestor

924

ancestors.add(n) # Mark as ancestor

949

ancestors.add(n) # Mark as ancestor

925

# Add non-nullid parents to list of nodes to tag.

950

# Add non-nullid parents to list of nodes to tag.

926

nodestotag.update([p for p in self.parents(n) if

951

nodestotag.update([p for p in self.parents(n) if

927

p != nullid])

952

p != nullid])

928

elif n in heads: # We've seen it before, is it a fake head?

953

elif n in heads: # We've seen it before, is it a fake head?

929

# So it is, real heads should not be the ancestors of

954

# So it is, real heads should not be the ancestors of

930

# any other heads.

955

# any other heads.

931

heads.pop(n)

956

heads.pop(n)

932

if not ancestors:

957

if not ancestors:

933

return nonodes

958

return nonodes

934

# Now that we have our set of ancestors, we want to remove any

959

# Now that we have our set of ancestors, we want to remove any

935

# roots that are not ancestors.

960

# roots that are not ancestors.

936

961

937

# If one of the roots was nullid, everything is included anyway.

962

# If one of the roots was nullid, everything is included anyway.

938

if lowestrev > nullrev:

963

if lowestrev > nullrev:

939

# But, since we weren't, let's recompute the lowest rev to not

964

# But, since we weren't, let's recompute the lowest rev to not

940

# include roots that aren't ancestors.

965

# include roots that aren't ancestors.

941

966

942

# Filter out roots that aren't ancestors of heads

967

# Filter out roots that aren't ancestors of heads

943

roots = [root for root in roots if root in ancestors]

968

roots = [root for root in roots if root in ancestors]

944

# Recompute the lowest revision

969

# Recompute the lowest revision

945

if roots:

970

if roots:

946

lowestrev = min([self.rev(root) for root in roots])

971

lowestrev = min([self.rev(root) for root in roots])

947

else:

972

else:

948

# No more roots? Return empty list

973

# No more roots? Return empty list

949

return nonodes

974

return nonodes

950

else:

975

else:

951

# We are descending from nullid, and don't need to care about

976

# We are descending from nullid, and don't need to care about

952

# any other roots.

977

# any other roots.

953

lowestrev = nullrev

978

lowestrev = nullrev

954

roots = [nullid]

979

roots = [nullid]

955

# Transform our roots list into a set.

980

# Transform our roots list into a set.

956

descendants = set(roots)

981

descendants = set(roots)

957

# Also, keep the original roots so we can filter out roots that aren't

982

# Also, keep the original roots so we can filter out roots that aren't

958

# 'real' roots (i.e. are descended from other roots).

983

# 'real' roots (i.e. are descended from other roots).

959

roots = descendants.copy()

984

roots = descendants.copy()

960

# Our topologically sorted list of output nodes.

985

# Our topologically sorted list of output nodes.

961

orderedout = []

986

orderedout = []

962

# Don't start at nullid since we don't want nullid in our output list,

987

# Don't start at nullid since we don't want nullid in our output list,

963

# and if nullid shows up in descendants, empty parents will look like

988

# and if nullid shows up in descendants, empty parents will look like

964

# they're descendants.

989

# they're descendants.

965

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

990

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

966

n = self.node(r)

991

n = self.node(r)

967

isdescendant = False

992

isdescendant = False

968

if lowestrev == nullrev: # Everybody is a descendant of nullid

993

if lowestrev == nullrev: # Everybody is a descendant of nullid

969

isdescendant = True

994

isdescendant = True

970

elif n in descendants:

995

elif n in descendants:

971

# n is already a descendant

996

# n is already a descendant

972

isdescendant = True

997

isdescendant = True

973

# This check only needs to be done here because all the roots

998

# This check only needs to be done here because all the roots

974

# will start being marked is descendants before the loop.

999

# will start being marked is descendants before the loop.

975

if n in roots:

1000

if n in roots:

976

# If n was a root, check if it's a 'real' root.

1001

# If n was a root, check if it's a 'real' root.

977

p = tuple(self.parents(n))

1002

p = tuple(self.parents(n))

978

# If any of its parents are descendants, it's not a root.

1003

# If any of its parents are descendants, it's not a root.

979

if (p[0] in descendants) or (p[1] in descendants):

1004

if (p[0] in descendants) or (p[1] in descendants):

980

roots.remove(n)

1005

roots.remove(n)

981

else:

1006

else:

982

p = tuple(self.parents(n))

1007

p = tuple(self.parents(n))

983

# A node is a descendant if either of its parents are

1008

# A node is a descendant if either of its parents are

984

# descendants. (We seeded the dependents list with the roots

1009

# descendants. (We seeded the dependents list with the roots

985

# up there, remember?)

1010

# up there, remember?)

986

if (p[0] in descendants) or (p[1] in descendants):

1011

if (p[0] in descendants) or (p[1] in descendants):

987

descendants.add(n)

1012

descendants.add(n)

988

isdescendant = True

1013

isdescendant = True

989

if isdescendant and ((ancestors is None) or (n in ancestors)):

1014

if isdescendant and ((ancestors is None) or (n in ancestors)):

990

# Only include nodes that are both descendants and ancestors.

1015

# Only include nodes that are both descendants and ancestors.

991

orderedout.append(n)

1016

orderedout.append(n)

992

if (ancestors is not None) and (n in heads):

1017

if (ancestors is not None) and (n in heads):

993

# We're trying to figure out which heads are reachable

1018

# We're trying to figure out which heads are reachable

994

# from roots.

1019

# from roots.

995

# Mark this head as having been reached

1020

# Mark this head as having been reached

996

heads[n] = True

1021

heads[n] = True

997

elif ancestors is None:

1022

elif ancestors is None:

998

# Otherwise, we're trying to discover the heads.

1023

# Otherwise, we're trying to discover the heads.

999

# Assume this is a head because if it isn't, the next step

1024

# Assume this is a head because if it isn't, the next step

1000

# will eventually remove it.

1025

# will eventually remove it.

1001

heads[n] = True

1026

heads[n] = True

1002

# But, obviously its parents aren't.

1027

# But, obviously its parents aren't.

1003

for p in self.parents(n):

1028

for p in self.parents(n):

1004

heads.pop(p, None)

1029

heads.pop(p, None)

1005

heads = [head for head, flag in heads.iteritems() if flag]

1030

heads = [head for head, flag in heads.iteritems() if flag]

1006

roots = list(roots)

1031

roots = list(roots)

1007

assert orderedout

1032

assert orderedout

1008

assert roots

1033

assert roots

1009

assert heads

1034

assert heads

1010

return (orderedout, roots, heads)

1035

return (orderedout, roots, heads)

1011

1036

1012

def headrevs(self):

1037

def headrevs(self):

1013

try:

1038

try:

1014

return self.index.headrevs()

1039

return self.index.headrevs()

1015

except AttributeError:

1040

except AttributeError:

1016

return self._headrevs()

1041

return self._headrevs()

1017

1042

1018

def computephases(self, roots):

1043

def computephases(self, roots):

1019

return self.index.computephasesmapsets(roots)

1044

return self.index.computephasesmapsets(roots)

1020

1045

1021

def _headrevs(self):

1046

def _headrevs(self):

1022

count = len(self)

1047

count = len(self)

1023

if not count:

1048

if not count:

1024

return [nullrev]

1049

return [nullrev]

1025

# we won't iter over filtered rev so nobody is a head at start

1050

# we won't iter over filtered rev so nobody is a head at start

1026

ishead = [0] * (count + 1)

1051

ishead = [0] * (count + 1)

1027

index = self.index

1052

index = self.index

1028

for r in self:

1053

for r in self:

1029

ishead[r] = 1 # I may be an head

1054

ishead[r] = 1 # I may be an head

1030

e = index[r]

1055

e = index[r]

1031

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1056

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1032

return [r for r, val in enumerate(ishead) if val]

1057

return [r for r, val in enumerate(ishead) if val]

1033

1058

1034

def heads(self, start=None, stop=None):

1059

def heads(self, start=None, stop=None):

1035

"""return the list of all nodes that have no children

1060

"""return the list of all nodes that have no children

1036

1061

1037

if start is specified, only heads that are descendants of

1062

if start is specified, only heads that are descendants of

1038

start will be returned

1063

start will be returned

1039

if stop is specified, it will consider all the revs from stop

1064

if stop is specified, it will consider all the revs from stop

1040

as if they had no children

1065

as if they had no children

1041

"""

1066

"""

1042

if start is None and stop is None:

1067

if start is None and stop is None:

1043

if not len(self):

1068

if not len(self):

1044

return [nullid]

1069

return [nullid]

1045

return [self.node(r) for r in self.headrevs()]

1070

return [self.node(r) for r in self.headrevs()]

1046

1071

1047

if start is None:

1072

if start is None:

1048

start = nullid

1073

start = nullid

1049

if stop is None:

1074

if stop is None:

1050

stop = []

1075

stop = []

1051

stoprevs = set([self.rev(n) for n in stop])

1076

stoprevs = set([self.rev(n) for n in stop])

1052

startrev = self.rev(start)

1077

startrev = self.rev(start)

1053

reachable = {startrev}

1078

reachable = {startrev}

1054

heads = {startrev}

1079

heads = {startrev}

1055

1080

1056

parentrevs = self.parentrevs

1081

parentrevs = self.parentrevs

1057

for r in self.revs(start=startrev + 1):

1082

for r in self.revs(start=startrev + 1):

1058

for p in parentrevs(r):

1083

for p in parentrevs(r):

1059

if p in reachable:

1084

if p in reachable:

1060

if r not in stoprevs:

1085

if r not in stoprevs:

1061

reachable.add(r)

1086

reachable.add(r)

1062

heads.add(r)

1087

heads.add(r)

1063

if p in heads and p not in stoprevs:

1088

if p in heads and p not in stoprevs:

1064

heads.remove(p)

1089

heads.remove(p)

1065

1090

1066

return [self.node(r) for r in heads]

1091

return [self.node(r) for r in heads]

1067

1092

1068

def children(self, node):

1093

def children(self, node):

1069

"""find the children of a given node"""

1094

"""find the children of a given node"""

1070

c = []

1095

c = []

1071

p = self.rev(node)

1096

p = self.rev(node)

1072

for r in self.revs(start=p + 1):

1097

for r in self.revs(start=p + 1):

1073

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1098

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1074

if prevs:

1099

if prevs:

1075

for pr in prevs:

1100

for pr in prevs:

1076

if pr == p:

1101

if pr == p:

1077

c.append(self.node(r))

1102

c.append(self.node(r))

1078

elif p == nullrev:

1103

elif p == nullrev:

1079

c.append(self.node(r))

1104

c.append(self.node(r))

1080

return c

1105

return c

1081

1106

1082

def descendant(self, start, end):

1107

def descendant(self, start, end):

1083

if start == nullrev:

1108

if start == nullrev:

1084

return True

1109

return True

1085

for i in self.descendants([start]):

1110

for i in self.descendants([start]):

1086

if i == end:

1111

if i == end:

1087

return True

1112

return True

1088

elif i > end:

1113

elif i > end:

1089

break

1114

break

1090

return False

1115

return False

1091

1116

1092

def commonancestorsheads(self, a, b):

1117

def commonancestorsheads(self, a, b):

1093

"""calculate all the heads of the common ancestors of nodes a and b"""

1118

"""calculate all the heads of the common ancestors of nodes a and b"""

1094

a, b = self.rev(a), self.rev(b)

1119

a, b = self.rev(a), self.rev(b)

1095

try:

1120

try:

1096

ancs = self.index.commonancestorsheads(a, b)

1121

ancs = self.index.commonancestorsheads(a, b)

1097

except (AttributeError, OverflowError): # C implementation failed

1122

except (AttributeError, OverflowError): # C implementation failed

1098

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

1123

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

1099

return pycompat.maplist(self.node, ancs)

1124

return pycompat.maplist(self.node, ancs)

1100

1125

1101

def isancestor(self, a, b):

1126

def isancestor(self, a, b):

1102

"""return True if node a is an ancestor of node b

1127

"""return True if node a is an ancestor of node b

1103

1128

1104

The implementation of this is trivial but the use of

1129

The implementation of this is trivial but the use of

1105

commonancestorsheads is not."""

1130

commonancestorsheads is not."""

1106

return a in self.commonancestorsheads(a, b)

1131

return a in self.commonancestorsheads(a, b)

1107

1132

1108

def ancestor(self, a, b):

1133

def ancestor(self, a, b):

1109

"""calculate the "best" common ancestor of nodes a and b"""

1134

"""calculate the "best" common ancestor of nodes a and b"""

1110

1135

1111

a, b = self.rev(a), self.rev(b)

1136

a, b = self.rev(a), self.rev(b)

1112

try:

1137

try:

1113

ancs = self.index.ancestors(a, b)

1138

ancs = self.index.ancestors(a, b)

1114

except (AttributeError, OverflowError):

1139

except (AttributeError, OverflowError):

1115

ancs = ancestor.ancestors(self.parentrevs, a, b)

1140

ancs = ancestor.ancestors(self.parentrevs, a, b)

1116

if ancs:

1141

if ancs:

1117

# choose a consistent winner when there's a tie

1142

# choose a consistent winner when there's a tie

1118

return min(map(self.node, ancs))

1143

return min(map(self.node, ancs))

1119

return nullid

1144

return nullid

1120

1145

1121

def _match(self, id):

1146

def _match(self, id):

1122

if isinstance(id, int):

1147

if isinstance(id, int):

1123

# rev

1148

# rev

1124

return self.node(id)

1149

return self.node(id)

1125

if len(id) == 20:

1150

if len(id) == 20:

1126

# possibly a binary node

1151

# possibly a binary node

1127

# odds of a binary node being all hex in ASCII are 1 in 10**25

1152

# odds of a binary node being all hex in ASCII are 1 in 10**25

1128

try:

1153

try:

1129

node = id

1154

node = id

1130

self.rev(node) # quick search the index

1155

self.rev(node) # quick search the index

1131

return node

1156

return node

1132

except LookupError:

1157

except LookupError:

1133

pass # may be partial hex id

1158

pass # may be partial hex id

1134

try:

1159

try:

1135

# str(rev)

1160

# str(rev)

1136

rev = int(id)

1161

rev = int(id)

1137

if str(rev) != id:

1162

if str(rev) != id:

1138

raise ValueError

1163

raise ValueError

1139

if rev < 0:

1164

if rev < 0:

1140

rev = len(self) + rev

1165

rev = len(self) + rev

1141

if rev < 0 or rev >= len(self):

1166

if rev < 0 or rev >= len(self):

1142

raise ValueError

1167

raise ValueError

1143

return self.node(rev)

1168

return self.node(rev)

1144

except (ValueError, OverflowError):

1169

except (ValueError, OverflowError):

1145

pass

1170

pass

1146

if len(id) == 40:

1171

if len(id) == 40:

1147

try:

1172

try:

1148

# a full hex nodeid?

1173

# a full hex nodeid?

1149

node = bin(id)

1174

node = bin(id)

1150

self.rev(node)

1175

self.rev(node)

1151

return node

1176

return node

1152

except (TypeError, LookupError):

1177

except (TypeError, LookupError):

1153

pass

1178

pass

1154

1179

1155

def _partialmatch(self, id):

1180

def _partialmatch(self, id):

1156

maybewdir = wdirhex.startswith(id)

1181

maybewdir = wdirhex.startswith(id)

1157

try:

1182

try:

1158

partial = self.index.partialmatch(id)

1183

partial = self.index.partialmatch(id)

1159

if partial and self.hasnode(partial):

1184

if partial and self.hasnode(partial):

1160

if maybewdir:

1185

if maybewdir:

1161

# single 'ff...' match in radix tree, ambiguous with wdir

1186

# single 'ff...' match in radix tree, ambiguous with wdir

1162

raise RevlogError

1187

raise RevlogError

1163

return partial

1188

return partial

1164

if maybewdir:

1189

if maybewdir:

1165

# no 'ff...' match in radix tree, wdir identified

1190

# no 'ff...' match in radix tree, wdir identified

1166

raise error.WdirUnsupported

1191

raise error.WdirUnsupported

1167

return None

1192

return None

1168

except RevlogError:

1193

except RevlogError:

1169

# parsers.c radix tree lookup gave multiple matches

1194

# parsers.c radix tree lookup gave multiple matches

1170

# fast path: for unfiltered changelog, radix tree is accurate

1195

# fast path: for unfiltered changelog, radix tree is accurate

1171

if not getattr(self, 'filteredrevs', None):

1196

if not getattr(self, 'filteredrevs', None):

1172

raise LookupError(id, self.indexfile,

1197

raise LookupError(id, self.indexfile,

1173

_('ambiguous identifier'))

1198

_('ambiguous identifier'))

1174

# fall through to slow path that filters hidden revisions

1199

# fall through to slow path that filters hidden revisions

1175

except (AttributeError, ValueError):

1200

except (AttributeError, ValueError):

1176

# we are pure python, or key was too short to search radix tree

1201

# we are pure python, or key was too short to search radix tree

1177

pass

1202

pass

1178

1203

1179

if id in self._pcache:

1204

if id in self._pcache:

1180

return self._pcache[id]

1205

return self._pcache[id]

1181

1206

1182

if len(id) < 40:

1207

if len(id) < 40:

1183

try:

1208

try:

1184

# hex(node)[:...]

1209

# hex(node)[:...]

1185

l = len(id) // 2 # grab an even number of digits

1210

l = len(id) // 2 # grab an even number of digits

1186

prefix = bin(id[:l * 2])

1211

prefix = bin(id[:l * 2])

1187

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1212

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1188

nl = [n for n in nl if hex(n).startswith(id) and

1213

nl = [n for n in nl if hex(n).startswith(id) and

1189

self.hasnode(n)]

1214

self.hasnode(n)]

1190

if len(nl) > 0:

1215

if len(nl) > 0:

1191

if len(nl) == 1 and not maybewdir:

1216

if len(nl) == 1 and not maybewdir:

1192

self._pcache[id] = nl[0]

1217

self._pcache[id] = nl[0]

1193

return nl[0]

1218

return nl[0]

1194

raise LookupError(id, self.indexfile,

1219

raise LookupError(id, self.indexfile,

1195

_('ambiguous identifier'))

1220

_('ambiguous identifier'))

1196

if maybewdir:

1221

if maybewdir:

1197

raise error.WdirUnsupported

1222

raise error.WdirUnsupported

1198

return None

1223

return None

1199

except (TypeError, binascii.Error):

1224

except (TypeError, binascii.Error):

1200

pass

1225

pass

1201

1226

1202

def lookup(self, id):

1227

def lookup(self, id):

1203

"""locate a node based on:

1228

"""locate a node based on:

1204

- revision number or str(revision number)

1229

- revision number or str(revision number)

1205

- nodeid or subset of hex nodeid

1230

- nodeid or subset of hex nodeid

1206

"""

1231

"""

1207

n = self._match(id)

1232

n = self._match(id)

1208

if n is not None:

1233

if n is not None:

1209

return n

1234

return n

1210

n = self._partialmatch(id)

1235

n = self._partialmatch(id)

1211

if n:

1236

if n:

1212

return n

1237

return n

1213

1238

1214

raise LookupError(id, self.indexfile, _('no match found'))

1239

raise LookupError(id, self.indexfile, _('no match found'))

1215

1240

1216

def shortest(self, hexnode, minlength=1):

1241

def shortest(self, hexnode, minlength=1):

1217

"""Find the shortest unambiguous prefix that matches hexnode."""

1242

"""Find the shortest unambiguous prefix that matches hexnode."""

1218

def isvalid(test):

1243

def isvalid(test):

1219

try:

1244

try:

1220

if self._partialmatch(test) is None:

1245

if self._partialmatch(test) is None:

1221

return False

1246

return False

1222

1247

1223

try:

1248

try:

1224

i = int(test)

1249

i = int(test)

1225

# if we are a pure int, then starting with zero will not be

1250

# if we are a pure int, then starting with zero will not be

1226

# confused as a rev; or, obviously, if the int is larger

1251

# confused as a rev; or, obviously, if the int is larger

1227

# than the value of the tip rev

1252

# than the value of the tip rev

1228

if test[0] == '0' or i > len(self):

1253

if test[0] == '0' or i > len(self):

1229

return True

1254

return True

1230

return False

1255

return False

1231

except ValueError:

1256

except ValueError:

1232

return True

1257

return True

1233

except error.RevlogError:

1258

except error.RevlogError:

1234

return False

1259

return False

1235

except error.WdirUnsupported:

1260

except error.WdirUnsupported:

1236

# single 'ff...' match

1261

# single 'ff...' match

1237

return True

1262

return True

1238

1263

1239

shortest = hexnode

1264

shortest = hexnode

1240

startlength = max(6, minlength)

1265

startlength = max(6, minlength)

1241

length = startlength

1266

length = startlength

1242

while True:

1267

while True:

1243

test = hexnode[:length]

1268

test = hexnode[:length]

1244

if isvalid(test):

1269

if isvalid(test):

1245

shortest = test

1270

shortest = test

1246

if length == minlength or length > startlength:

1271

if length == minlength or length > startlength:

1247

return shortest

1272

return shortest

1248

length -= 1

1273

length -= 1

1249

else:

1274

else:

1250

length += 1

1275

length += 1

1251

if len(shortest) <= length:

1276

if len(shortest) <= length:

1252

return shortest

1277

return shortest

1253

1278

1254

def cmp(self, node, text):

1279

def cmp(self, node, text):

1255

"""compare text with a given file revision

1280

"""compare text with a given file revision

1256

1281

1257

returns True if text is different than what is stored.

1282

returns True if text is different than what is stored.

1258

"""

1283

"""

1259

p1, p2 = self.parents(node)

1284

p1, p2 = self.parents(node)

1260

return hash(text, p1, p2) != node

1285

return hash(text, p1, p2) != node

1261

1286

1262

def _cachesegment(self, offset, data):

1287

def _cachesegment(self, offset, data):

1263

"""Add a segment to the revlog cache.

1288

"""Add a segment to the revlog cache.

1264

1289

1265

Accepts an absolute offset and the data that is at that location.

1290

Accepts an absolute offset and the data that is at that location.

1266

"""

1291

"""

1267

o, d = self._chunkcache

1292

o, d = self._chunkcache

1268

# try to add to existing cache

1293

# try to add to existing cache

1269

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1294

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1270

self._chunkcache = o, d + data

1295

self._chunkcache = o, d + data

1271

else:

1296

else:

1272

self._chunkcache = offset, data

1297

self._chunkcache = offset, data

1273

1298

1274

def _readsegment(self, offset, length, df=None):

1299

def _readsegment(self, offset, length, df=None):

1275

"""Load a segment of raw data from the revlog.

1300

"""Load a segment of raw data from the revlog.

1276

1301

1277

Accepts an absolute offset, length to read, and an optional existing

1302

Accepts an absolute offset, length to read, and an optional existing

1278

file handle to read from.

1303

file handle to read from.

1279

1304

1280

If an existing file handle is passed, it will be seeked and the

1305

If an existing file handle is passed, it will be seeked and the

1281

original seek position will NOT be restored.

1306

original seek position will NOT be restored.

1282

1307

1283

Returns a str or buffer of raw byte data.

1308

Returns a str or buffer of raw byte data.

1284

"""

1309

"""

1285

if df is not None:

1310

if df is not None:

1286

closehandle = False

1311

closehandle = False

1287

else:

1312

else:

1288

if self._inline:

1313

if self._inline:

1289

df = self.opener(self.indexfile)

1314

df = self.opener(self.indexfile)

1290

else:

1315

else:

1291

df = self.opener(self.datafile)

1316

df = self.opener(self.datafile)

1292

closehandle = True

1317

closehandle = True

1293

1318

1294

# Cache data both forward and backward around the requested

1319

# Cache data both forward and backward around the requested

1295

# data, in a fixed size window. This helps speed up operations

1320

# data, in a fixed size window. This helps speed up operations

1296

# involving reading the revlog backwards.

1321

# involving reading the revlog backwards.

1297

cachesize = self._chunkcachesize

1322

cachesize = self._chunkcachesize

1298

realoffset = offset & ~(cachesize - 1)

1323

realoffset = offset & ~(cachesize - 1)

1299

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1324

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1300

- realoffset)

1325

- realoffset)

1301

df.seek(realoffset)

1326

df.seek(realoffset)

1302

d = df.read(reallength)

1327

d = df.read(reallength)

1303

if closehandle:

1328

if closehandle:

1304

df.close()

1329

df.close()

1305

self._cachesegment(realoffset, d)

1330

self._cachesegment(realoffset, d)

1306

if offset != realoffset or reallength != length:

1331

if offset != realoffset or reallength != length:

1307

return util.buffer(d, offset - realoffset, length)

1332

return util.buffer(d, offset - realoffset, length)

1308

return d

1333

return d

1309

1334

1310

def _getsegment(self, offset, length, df=None):

1335

def _getsegment(self, offset, length, df=None):

1311

"""Obtain a segment of raw data from the revlog.

1336

"""Obtain a segment of raw data from the revlog.

1312

1337

1313

Accepts an absolute offset, length of bytes to obtain, and an

1338

Accepts an absolute offset, length of bytes to obtain, and an

1314

optional file handle to the already-opened revlog. If the file

1339

optional file handle to the already-opened revlog. If the file

1315

handle is used, it's original seek position will not be preserved.

1340

handle is used, it's original seek position will not be preserved.

1316

1341

1317

Requests for data may be returned from a cache.

1342

Requests for data may be returned from a cache.

1318

1343

1319

Returns a str or a buffer instance of raw byte data.

1344

Returns a str or a buffer instance of raw byte data.

1320

"""

1345

"""

1321

o, d = self._chunkcache

1346

o, d = self._chunkcache

1322

l = len(d)

1347

l = len(d)

1323

1348

1324

# is it in the cache?

1349

# is it in the cache?

1325

cachestart = offset - o

1350

cachestart = offset - o

1326

cacheend = cachestart + length

1351

cacheend = cachestart + length

1327

if cachestart >= 0 and cacheend <= l:

1352

if cachestart >= 0 and cacheend <= l:

1328

if cachestart == 0 and cacheend == l:

1353

if cachestart == 0 and cacheend == l:

1329

return d # avoid a copy

1354

return d # avoid a copy

1330

return util.buffer(d, cachestart, cacheend - cachestart)

1355

return util.buffer(d, cachestart, cacheend - cachestart)

1331

1356

1332

return self._readsegment(offset, length, df=df)

1357

return self._readsegment(offset, length, df=df)

1333

1358

1334

def _getsegmentforrevs(self, startrev, endrev, df=None):

1359

def _getsegmentforrevs(self, startrev, endrev, df=None):

1335

"""Obtain a segment of raw data corresponding to a range of revisions.

1360

"""Obtain a segment of raw data corresponding to a range of revisions.

1336

1361

1337

Accepts the start and end revisions and an optional already-open

1362

Accepts the start and end revisions and an optional already-open

1338

file handle to be used for reading. If the file handle is read, its

1363

file handle to be used for reading. If the file handle is read, its

1339

seek position will not be preserved.

1364

seek position will not be preserved.

1340

1365

1341

Requests for data may be satisfied by a cache.

1366

Requests for data may be satisfied by a cache.

1342

1367

1343

Returns a 2-tuple of (offset, data) for the requested range of

1368

Returns a 2-tuple of (offset, data) for the requested range of

1344

revisions. Offset is the integer offset from the beginning of the

1369

revisions. Offset is the integer offset from the beginning of the

1345

revlog and data is a str or buffer of the raw byte data.

1370

revlog and data is a str or buffer of the raw byte data.

1346

1371

1347

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1372

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1348

to determine where each revision's data begins and ends.

1373

to determine where each revision's data begins and ends.

1349

"""

1374

"""

1350

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1375

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1351

# (functions are expensive).

1376

# (functions are expensive).

1352

index = self.index

1377

index = self.index

1353

istart = index[startrev]

1378

istart = index[startrev]

1354

start = int(istart[0] >> 16)

1379

start = int(istart[0] >> 16)

1355

if startrev == endrev:

1380

if startrev == endrev:

1356

end = start + istart[1]

1381

end = start + istart[1]

1357

else:

1382

else:

1358

iend = index[endrev]

1383

iend = index[endrev]

1359

end = int(iend[0] >> 16) + iend[1]

1384

end = int(iend[0] >> 16) + iend[1]

1360

1385

1361

if self._inline:

1386

if self._inline:

1362

start += (startrev + 1) * self._io.size

1387

start += (startrev + 1) * self._io.size

1363

end += (endrev + 1) * self._io.size

1388

end += (endrev + 1) * self._io.size

1364

length = end - start

1389

length = end - start

1365

1390

1366

return start, self._getsegment(start, length, df=df)

1391

return start, self._getsegment(start, length, df=df)

1367

1392

1368

def _chunk(self, rev, df=None):

1393

def _chunk(self, rev, df=None):

1369

"""Obtain a single decompressed chunk for a revision.

1394

"""Obtain a single decompressed chunk for a revision.

1370

1395

1371

Accepts an integer revision and an optional already-open file handle

1396

Accepts an integer revision and an optional already-open file handle

1372

to be used for reading. If used, the seek position of the file will not

1397

to be used for reading. If used, the seek position of the file will not

1373

be preserved.

1398

be preserved.

1374

1399

1375

Returns a str holding uncompressed data for the requested revision.

1400

Returns a str holding uncompressed data for the requested revision.

1376

"""

1401

"""

1377

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1402

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1378

1403

1379

def _chunks(self, revs, df=None):

1404

def _chunks(self, revs, df=None):

1380

"""Obtain decompressed chunks for the specified revisions.

1405

"""Obtain decompressed chunks for the specified revisions.

1381

1406

1382

Accepts an iterable of numeric revisions that are assumed to be in

1407

Accepts an iterable of numeric revisions that are assumed to be in

1383

ascending order. Also accepts an optional already-open file handle

1408

ascending order. Also accepts an optional already-open file handle

1384

to be used for reading. If used, the seek position of the file will

1409

to be used for reading. If used, the seek position of the file will

1385

not be preserved.

1410

not be preserved.

1386

1411

1387

This function is similar to calling ``self._chunk()`` multiple times,

1412

This function is similar to calling ``self._chunk()`` multiple times,

1388

but is faster.

1413

but is faster.

1389

1414

1390

Returns a list with decompressed data for each requested revision.

1415

Returns a list with decompressed data for each requested revision.

1391

"""

1416

"""

1392

if not revs:

1417

if not revs:

1393

return []

1418

return []

1394

start = self.start

1419

start = self.start

1395

length = self.length

1420

length = self.length

1396

inline = self._inline

1421

inline = self._inline

1397

iosize = self._io.size

1422

iosize = self._io.size

1398

buffer = util.buffer

1423

buffer = util.buffer

1399

1424

1400

l = []

1425

l = []

1401

ladd = l.append

1426

ladd = l.append

1402

1427

1403

if not self._withsparseread:

1428

if not self._withsparseread:

1404

slicedchunks = (revs,)

1429

slicedchunks = (revs,)

1405

else:

1430

else:

1406

slicedchunks = _slicechunk(self, revs)

1431

slicedchunks = _slicechunk(self, revs)

1407

1432

1408

for revschunk in slicedchunks:

1433

for revschunk in slicedchunks:

1409

firstrev = revschunk[0]

1434

firstrev = revschunk[0]

1410

# Skip trailing revisions with empty diff

1435

# Skip trailing revisions with empty diff

1411

for lastrev in revschunk[::-1]:

1436

for lastrev in revschunk[::-1]:

1412

if length(lastrev) != 0:

1437

if length(lastrev) != 0:

1413

break

1438

break

1414

1439

1415

try:

1440

try:

1416

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1441

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1417

except OverflowError:

1442

except OverflowError:

1418

# issue4215 - we can't cache a run of chunks greater than

1443

# issue4215 - we can't cache a run of chunks greater than

1419

# 2G on Windows

1444

# 2G on Windows

1420

return [self._chunk(rev, df=df) for rev in revschunk]

1445

return [self._chunk(rev, df=df) for rev in revschunk]

1421

1446

1422

decomp = self.decompress

1447

decomp = self.decompress

1423

for rev in revschunk:

1448

for rev in revschunk:

1424

chunkstart = start(rev)

1449

chunkstart = start(rev)

1425

if inline:

1450

if inline:

1426

chunkstart += (rev + 1) * iosize

1451

chunkstart += (rev + 1) * iosize

1427

chunklength = length(rev)

1452

chunklength = length(rev)

1428

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1453

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1429

1454

1430

return l

1455

return l

1431

1456

1432

def _chunkclear(self):

1457

def _chunkclear(self):

1433

"""Clear the raw chunk cache."""

1458

"""Clear the raw chunk cache."""

1434

self._chunkcache = (0, '')

1459

self._chunkcache = (0, '')

1435

1460

1436

def deltaparent(self, rev):

1461

def deltaparent(self, rev):

1437

"""return deltaparent of the given revision"""

1462

"""return deltaparent of the given revision"""

1438

base = self.index[rev][3]

1463

base = self.index[rev][3]

1439

if base == rev:

1464

if base == rev:

1440

return nullrev

1465

return nullrev

1441

elif self._generaldelta:

1466

elif self._generaldelta:

1442

return base

1467

return base

1443

else:

1468

else:

1444

return rev - 1

1469

return rev - 1

1445

1470

1446

def revdiff(self, rev1, rev2):

1471

def revdiff(self, rev1, rev2):

1447

"""return or calculate a delta between two revisions

1472

"""return or calculate a delta between two revisions

1448

1473

1449

The delta calculated is in binary form and is intended to be written to

1474

The delta calculated is in binary form and is intended to be written to

1450

revlog data directly. So this function needs raw revision data.

1475

revlog data directly. So this function needs raw revision data.

1451

"""

1476

"""

1452

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1477

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1453

return bytes(self._chunk(rev2))

1478

return bytes(self._chunk(rev2))

1454

1479

1455

return mdiff.textdiff(self.revision(rev1, raw=True),

1480

return mdiff.textdiff(self.revision(rev1, raw=True),

1456

self.revision(rev2, raw=True))

1481

self.revision(rev2, raw=True))

1457

1482

1458

def revision(self, nodeorrev, _df=None, raw=False):

1483

def revision(self, nodeorrev, _df=None, raw=False):

1459

"""return an uncompressed revision of a given node or revision

1484

"""return an uncompressed revision of a given node or revision

1460

number.

1485

number.

1461

1486

1462

_df - an existing file handle to read from. (internal-only)

1487

_df - an existing file handle to read from. (internal-only)

1463

raw - an optional argument specifying if the revision data is to be

1488

raw - an optional argument specifying if the revision data is to be

1464

treated as raw data when applying flag transforms. 'raw' should be set

1489

treated as raw data when applying flag transforms. 'raw' should be set

1465

to True when generating changegroups or in debug commands.

1490

to True when generating changegroups or in debug commands.

1466

"""

1491

"""

1467

if isinstance(nodeorrev, int):

1492

if isinstance(nodeorrev, int):

1468

rev = nodeorrev

1493

rev = nodeorrev

1469

node = self.node(rev)

1494

node = self.node(rev)

1470

else:

1495

else:

1471

node = nodeorrev

1496

node = nodeorrev

1472

rev = None

1497

rev = None

1473

1498

1474

cachedrev = None

1499

cachedrev = None

1475

flags = None

1500

flags = None

1476

rawtext = None

1501

rawtext = None

1477

if node == nullid:

1502

if node == nullid:

1478

return ""

1503

return ""

1479

if self._cache:

1504

if self._cache:

1480

if self._cache[0] == node:

1505

if self._cache[0] == node:

1481

# _cache only stores rawtext

1506

# _cache only stores rawtext

1482

if raw:

1507

if raw:

1483

return self._cache[2]

1508

return self._cache[2]

1484

# duplicated, but good for perf

1509

# duplicated, but good for perf

1485

if rev is None:

1510

if rev is None:

1486

rev = self.rev(node)

1511

rev = self.rev(node)

1487

if flags is None:

1512

if flags is None:

1488

flags = self.flags(rev)

1513

flags = self.flags(rev)

1489

# no extra flags set, no flag processor runs, text = rawtext

1514

# no extra flags set, no flag processor runs, text = rawtext

1490

if flags == REVIDX_DEFAULT_FLAGS:

1515

if flags == REVIDX_DEFAULT_FLAGS:

1491

return self._cache[2]

1516

return self._cache[2]

1492

# rawtext is reusable. need to run flag processor

1517

# rawtext is reusable. need to run flag processor

1493

rawtext = self._cache[2]

1518

rawtext = self._cache[2]

1494

1519

1495

cachedrev = self._cache[1]

1520

cachedrev = self._cache[1]

1496

1521

1497

# look up what we need to read

1522

# look up what we need to read

1498

if rawtext is None:

1523

if rawtext is None:

1499

if rev is None:

1524

if rev is None:

1500

rev = self.rev(node)

1525

rev = self.rev(node)

1501

1526

1502

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1527

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1503

if stopped:

1528

if stopped:

1504

rawtext = self._cache[2]

1529

rawtext = self._cache[2]

1505

1530

1506

# drop cache to save memory

1531

# drop cache to save memory

1507

self._cache = None

1532

self._cache = None

1508

1533

1509

bins = self._chunks(chain, df=_df)

1534

bins = self._chunks(chain, df=_df)

1510

if rawtext is None:

1535

if rawtext is None:

1511

rawtext = bytes(bins[0])

1536

rawtext = bytes(bins[0])

1512

bins = bins[1:]

1537

bins = bins[1:]

1513

1538

1514

rawtext = mdiff.patches(rawtext, bins)

1539

rawtext = mdiff.patches(rawtext, bins)

1515

self._cache = (node, rev, rawtext)

1540

self._cache = (node, rev, rawtext)

1516

1541

1517

if flags is None:

1542

if flags is None:

1518

if rev is None:

1543

if rev is None:

1519

rev = self.rev(node)

1544

rev = self.rev(node)

1520

flags = self.flags(rev)

1545

flags = self.flags(rev)

1521

1546

1522

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1547

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1523

if validatehash:

1548

if validatehash:

1524

self.checkhash(text, node, rev=rev)

1549

self.checkhash(text, node, rev=rev)

1525

1550

1526

return text

1551

return text

1527

1552

1528

def hash(self, text, p1, p2):

1553

def hash(self, text, p1, p2):

1529

"""Compute a node hash.

1554

"""Compute a node hash.

1530

1555

1531

Available as a function so that subclasses can replace the hash

1556

Available as a function so that subclasses can replace the hash

1532

as needed.

1557

as needed.

1533

"""

1558

"""

1534

return hash(text, p1, p2)

1559

return hash(text, p1, p2)

1535

1560

1536

def _processflags(self, text, flags, operation, raw=False):

1561

def _processflags(self, text, flags, operation, raw=False):

1537

"""Inspect revision data flags and applies transforms defined by

1562

"""Inspect revision data flags and applies transforms defined by

1538

registered flag processors.

1563

registered flag processors.

1539

1564

1540

``text`` - the revision data to process

1565

``text`` - the revision data to process

1541

``flags`` - the revision flags

1566

``flags`` - the revision flags

1542

``operation`` - the operation being performed (read or write)

1567

``operation`` - the operation being performed (read or write)

1543

``raw`` - an optional argument describing if the raw transform should be

1568

``raw`` - an optional argument describing if the raw transform should be

1544

applied.

1569

applied.

1545

1570

1546

This method processes the flags in the order (or reverse order if

1571

This method processes the flags in the order (or reverse order if

1547

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1572

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1548

flag processors registered for present flags. The order of flags defined

1573

flag processors registered for present flags. The order of flags defined

1549

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1574

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1550

1575

1551

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1576

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1552

processed text and ``validatehash`` is a bool indicating whether the

1577

processed text and ``validatehash`` is a bool indicating whether the

1553

returned text should be checked for hash integrity.

1578

returned text should be checked for hash integrity.

1554

1579

1555

Note: If the ``raw`` argument is set, it has precedence over the

1580

Note: If the ``raw`` argument is set, it has precedence over the

1556

operation and will only update the value of ``validatehash``.

1581

operation and will only update the value of ``validatehash``.

1557

"""

1582

"""

1558

# fast path: no flag processors will run

1583

# fast path: no flag processors will run

1559

if flags == 0:

1584

if flags == 0:

1560

return text, True

1585

return text, True

1561

if not operation in ('read', 'write'):

1586

if not operation in ('read', 'write'):

1562

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1587

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1563

# Check all flags are known.

1588

# Check all flags are known.

1564

if flags & ~REVIDX_KNOWN_FLAGS:

1589

if flags & ~REVIDX_KNOWN_FLAGS:

1565

raise RevlogError(_("incompatible revision flag '%#x'") %

1590

raise RevlogError(_("incompatible revision flag '%#x'") %

1566

(flags & ~REVIDX_KNOWN_FLAGS))

1591

(flags & ~REVIDX_KNOWN_FLAGS))

1567

validatehash = True

1592

validatehash = True

1568

# Depending on the operation (read or write), the order might be

1593

# Depending on the operation (read or write), the order might be

1569

# reversed due to non-commutative transforms.

1594

# reversed due to non-commutative transforms.

1570

orderedflags = REVIDX_FLAGS_ORDER

1595

orderedflags = REVIDX_FLAGS_ORDER

1571

if operation == 'write':

1596

if operation == 'write':

1572

orderedflags = reversed(orderedflags)

1597

orderedflags = reversed(orderedflags)

1573

1598

1574

for flag in orderedflags:

1599

for flag in orderedflags:

1575

# If a flagprocessor has been registered for a known flag, apply the

1600

# If a flagprocessor has been registered for a known flag, apply the

1576

# related operation transform and update result tuple.

1601

# related operation transform and update result tuple.

1577

if flag & flags:

1602

if flag & flags:

1578

vhash = True

1603

vhash = True

1579

1604

1580

if flag not in _flagprocessors:

1605

if flag not in _flagprocessors:

1581

message = _("missing processor for flag '%#x'") % (flag)

1606

message = _("missing processor for flag '%#x'") % (flag)

1582

raise RevlogError(message)

1607

raise RevlogError(message)

1583

1608

1584

processor = _flagprocessors[flag]

1609

processor = _flagprocessors[flag]

1585

if processor is not None:

1610

if processor is not None:

1586

readtransform, writetransform, rawtransform = processor

1611

readtransform, writetransform, rawtransform = processor

1587

1612

1588

if raw:

1613

if raw:

1589

vhash = rawtransform(self, text)

1614

vhash = rawtransform(self, text)

1590

elif operation == 'read':

1615

elif operation == 'read':

1591

text, vhash = readtransform(self, text)

1616

text, vhash = readtransform(self, text)

1592

else: # write operation

1617

else: # write operation

1593

text, vhash = writetransform(self, text)

1618

text, vhash = writetransform(self, text)

1594

validatehash = validatehash and vhash

1619

validatehash = validatehash and vhash

1595

1620

1596

return text, validatehash

1621

return text, validatehash

1597

1622

1598

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1623

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1599

"""Check node hash integrity.

1624

"""Check node hash integrity.

1600

1625

1601

Available as a function so that subclasses can extend hash mismatch

1626

Available as a function so that subclasses can extend hash mismatch

1602

behaviors as needed.

1627

behaviors as needed.

1603

"""

1628

"""

1604

if p1 is None and p2 is None:

1629

if p1 is None and p2 is None:

1605

p1, p2 = self.parents(node)

1630

p1, p2 = self.parents(node)

1606

if node != self.hash(text, p1, p2):

1631

if node != self.hash(text, p1, p2):

1607

revornode = rev

1632

revornode = rev

1608

if revornode is None:

1633

if revornode is None:

1609

revornode = templatefilters.short(hex(node))

1634

revornode = templatefilters.short(hex(node))

1610

raise RevlogError(_("integrity check failed on %s:%s")

1635

raise RevlogError(_("integrity check failed on %s:%s")

1611

% (self.indexfile, pycompat.bytestr(revornode)))

1636

% (self.indexfile, pycompat.bytestr(revornode)))

1612

1637

1613

def checkinlinesize(self, tr, fp=None):

1638

def checkinlinesize(self, tr, fp=None):

1614

"""Check if the revlog is too big for inline and convert if so.

1639

"""Check if the revlog is too big for inline and convert if so.

1615

1640

1616

This should be called after revisions are added to the revlog. If the

1641

This should be called after revisions are added to the revlog. If the

1617

revlog has grown too large to be an inline revlog, it will convert it

1642

revlog has grown too large to be an inline revlog, it will convert it

1618

to use multiple index and data files.

1643

to use multiple index and data files.

1619

"""

1644

"""

1620

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1645

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1621

return

1646

return

1622

1647

1623

trinfo = tr.find(self.indexfile)

1648

trinfo = tr.find(self.indexfile)

1624

if trinfo is None:

1649

if trinfo is None:

1625

raise RevlogError(_("%s not found in the transaction")

1650

raise RevlogError(_("%s not found in the transaction")

1626

% self.indexfile)

1651

% self.indexfile)

1627

1652

1628

trindex = trinfo[2]

1653

trindex = trinfo[2]

1629

if trindex is not None:

1654

if trindex is not None:

1630

dataoff = self.start(trindex)

1655

dataoff = self.start(trindex)

1631

else:

1656

else:

1632

# revlog was stripped at start of transaction, use all leftover data

1657

# revlog was stripped at start of transaction, use all leftover data

1633

trindex = len(self) - 1

1658

trindex = len(self) - 1

1634

dataoff = self.end(-2)

1659

dataoff = self.end(-2)

1635

1660

1636

tr.add(self.datafile, dataoff)

1661

tr.add(self.datafile, dataoff)

1637

1662

1638

if fp:

1663

if fp:

1639

fp.flush()

1664

fp.flush()

1640

fp.close()

1665

fp.close()

1641

1666

1642

df = self.opener(self.datafile, 'w')

1667

df = self.opener(self.datafile, 'w')

1643

try:

1668

try:

1644

for r in self:

1669

for r in self:

1645

df.write(self._getsegmentforrevs(r, r)[1])

1670

df.write(self._getsegmentforrevs(r, r)[1])

1646

finally:

1671

finally:

1647

df.close()

1672

df.close()

1648

1673

1649

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1674

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1650

checkambig=self._checkambig)

1675

checkambig=self._checkambig)

1651

self.version &= ~FLAG_INLINE_DATA

1676

self.version &= ~FLAG_INLINE_DATA

1652

self._inline = False

1677

self._inline = False

1653

for i in self:

1678

for i in self:

1654

e = self._io.packentry(self.index[i], self.node, self.version, i)

1679

e = self._io.packentry(self.index[i], self.node, self.version, i)

1655

fp.write(e)

1680

fp.write(e)

1656

1681

1657

# if we don't call close, the temp file will never replace the

1682

# if we don't call close, the temp file will never replace the

1658

# real index

1683

# real index

1659

fp.close()

1684

fp.close()

1660

1685

1661

tr.replace(self.indexfile, trindex * self._io.size)

1686

tr.replace(self.indexfile, trindex * self._io.size)

1662

self._chunkclear()

1687

self._chunkclear()

1663

1688

1664

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1689

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1665

node=None, flags=REVIDX_DEFAULT_FLAGS):

1690

node=None, flags=REVIDX_DEFAULT_FLAGS):

1666

"""add a revision to the log

1691

"""add a revision to the log

1667

1692

1668

text - the revision data to add

1693

text - the revision data to add

1669

transaction - the transaction object used for rollback

1694

transaction - the transaction object used for rollback

1670

link - the linkrev data to add

1695

link - the linkrev data to add

1671

p1, p2 - the parent nodeids of the revision

1696

p1, p2 - the parent nodeids of the revision

1672

cachedelta - an optional precomputed delta

1697

cachedelta - an optional precomputed delta

1673

node - nodeid of revision; typically node is not specified, and it is

1698

node - nodeid of revision; typically node is not specified, and it is

1674

computed by default as hash(text, p1, p2), however subclasses might

1699

computed by default as hash(text, p1, p2), however subclasses might

1675

use different hashing method (and override checkhash() in such case)

1700

use different hashing method (and override checkhash() in such case)

1676

flags - the known flags to set on the revision

1701

flags - the known flags to set on the revision

1677

"""

1702

"""

1678

if link == nullrev:

1703

if link == nullrev:

1679

raise RevlogError(_("attempted to add linkrev -1 to %s")

1704

raise RevlogError(_("attempted to add linkrev -1 to %s")

1680

% self.indexfile)

1705

% self.indexfile)

1681

1706

1682

if flags:

1707

if flags:

1683

node = node or self.hash(text, p1, p2)

1708

node = node or self.hash(text, p1, p2)

1684

1709

1685

rawtext, validatehash = self._processflags(text, flags, 'write')

1710

rawtext, validatehash = self._processflags(text, flags, 'write')

1686

1711

1687

# If the flag processor modifies the revision data, ignore any provided

1712

# If the flag processor modifies the revision data, ignore any provided

1688

# cachedelta.

1713

# cachedelta.

1689

if rawtext != text:

1714

if rawtext != text:

1690

cachedelta = None

1715

cachedelta = None

1691

1716

1692

if len(rawtext) > _maxentrysize:

1717

if len(rawtext) > _maxentrysize:

1693

raise RevlogError(

1718

raise RevlogError(

1694

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1719

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1695

% (self.indexfile, len(rawtext)))

1720

% (self.indexfile, len(rawtext)))

1696

1721

1697

node = node or self.hash(rawtext, p1, p2)

1722

node = node or self.hash(rawtext, p1, p2)

1698

if node in self.nodemap:

1723

if node in self.nodemap:

1699

return node

1724

return node

1700

1725

1701

if validatehash:

1726

if validatehash:

1702

self.checkhash(rawtext, node, p1=p1, p2=p2)

1727

self.checkhash(rawtext, node, p1=p1, p2=p2)

1703

1728

1704

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1729

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1705

flags, cachedelta=cachedelta)

1730

flags, cachedelta=cachedelta)

1706

1731

1707

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1732

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1708

cachedelta=None):

1733

cachedelta=None):

1709

"""add a raw revision with known flags, node and parents

1734

"""add a raw revision with known flags, node and parents

1710

useful when reusing a revision not stored in this revlog (ex: received

1735

useful when reusing a revision not stored in this revlog (ex: received

1711

over wire, or read from an external bundle).

1736

over wire, or read from an external bundle).

1712

"""

1737

"""

1713

dfh = None

1738

dfh = None

1714

if not self._inline:

1739

if not self._inline:

1715

dfh = self.opener(self.datafile, "a+")

1740

dfh = self.opener(self.datafile, "a+")

1716

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1741

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1717

try:

1742

try:

1718

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1743

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1719

flags, cachedelta, ifh, dfh)

1744

flags, cachedelta, ifh, dfh)

1720

finally:

1745

finally:

1721

if dfh:

1746

if dfh:

1722

dfh.close()

1747

dfh.close()

1723

ifh.close()

1748

ifh.close()

1724

1749

1725

def compress(self, data):

1750

def compress(self, data):

1726

"""Generate a possibly-compressed representation of data."""

1751

"""Generate a possibly-compressed representation of data."""

1727

if not data:

1752

if not data:

1728

return '', data

1753

return '', data

1729

1754

1730

compressed = self._compressor.compress(data)

1755

compressed = self._compressor.compress(data)

1731

1756

1732

if compressed:

1757

if compressed:

1733

# The revlog compressor added the header in the returned data.

1758

# The revlog compressor added the header in the returned data.

1734

return '', compressed

1759

return '', compressed

1735

1760

1736

if data[0:1] == '\0':

1761

if data[0:1] == '\0':

1737

return '', data

1762

return '', data

1738

return 'u', data

1763

return 'u', data

1739

1764

1740

def decompress(self, data):

1765

def decompress(self, data):

1741

"""Decompress a revlog chunk.

1766

"""Decompress a revlog chunk.

1742

1767

1743

The chunk is expected to begin with a header identifying the

1768

The chunk is expected to begin with a header identifying the

1744

format type so it can be routed to an appropriate decompressor.

1769

format type so it can be routed to an appropriate decompressor.

1745

"""

1770

"""

1746

if not data:

1771

if not data:

1747

return data

1772

return data

1748

1773

1749

# Revlogs are read much more frequently than they are written and many

1774

# Revlogs are read much more frequently than they are written and many

1750

# chunks only take microseconds to decompress, so performance is

1775

# chunks only take microseconds to decompress, so performance is

1751

# important here.

1776

# important here.

1752

#

1777

#

1753

# We can make a few assumptions about revlogs:

1778

# We can make a few assumptions about revlogs:

1754

#

1779

#

1755

# 1) the majority of chunks will be compressed (as opposed to inline

1780

# 1) the majority of chunks will be compressed (as opposed to inline

1756

# raw data).

1781

# raw data).

1757

# 2) decompressing *any* data will likely by at least 10x slower than

1782

# 2) decompressing *any* data will likely by at least 10x slower than

1758

# returning raw inline data.

1783

# returning raw inline data.

1759

# 3) we want to prioritize common and officially supported compression

1784

# 3) we want to prioritize common and officially supported compression

1760

# engines

1785

# engines

1761

#

1786

#

1762

# It follows that we want to optimize for "decompress compressed data

1787

# It follows that we want to optimize for "decompress compressed data

1763

# when encoded with common and officially supported compression engines"

1788

# when encoded with common and officially supported compression engines"

1764

# case over "raw data" and "data encoded by less common or non-official

1789

# case over "raw data" and "data encoded by less common or non-official

1765

# compression engines." That is why we have the inline lookup first

1790

# compression engines." That is why we have the inline lookup first

1766

# followed by the compengines lookup.

1791

# followed by the compengines lookup.

1767

#

1792

#

1768

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1793

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1769

# compressed chunks. And this matters for changelog and manifest reads.

1794

# compressed chunks. And this matters for changelog and manifest reads.

1770

t = data[0:1]

1795

t = data[0:1]

1771

1796

1772

if t == 'x':

1797

if t == 'x':

1773

try:

1798

try:

1774

return _zlibdecompress(data)

1799

return _zlibdecompress(data)

1775

except zlib.error as e:

1800

except zlib.error as e:

1776

raise RevlogError(_('revlog decompress error: %s') % str(e))

1801

raise RevlogError(_('revlog decompress error: %s') % str(e))

1777

# '\0' is more common than 'u' so it goes first.

1802

# '\0' is more common than 'u' so it goes first.

1778

elif t == '\0':

1803

elif t == '\0':

1779

return data

1804

return data

1780

elif t == 'u':

1805

elif t == 'u':

1781

return util.buffer(data, 1)

1806

return util.buffer(data, 1)

1782

1807

1783

try:

1808

try:

1784

compressor = self._decompressors[t]

1809

compressor = self._decompressors[t]

1785

except KeyError:

1810

except KeyError:

1786

try:

1811

try:

1787

engine = util.compengines.forrevlogheader(t)

1812

engine = util.compengines.forrevlogheader(t)

1788

compressor = engine.revlogcompressor()

1813

compressor = engine.revlogcompressor()

1789

self._decompressors[t] = compressor

1814

self._decompressors[t] = compressor

1790

except KeyError:

1815

except KeyError:

1791

raise RevlogError(_('unknown compression type %r') % t)

1816

raise RevlogError(_('unknown compression type %r') % t)

1792

1817

1793

return compressor.decompress(data)

1818

return compressor.decompress(data)

1794

1819

1795

def _isgooddelta(self, d, textlen):

1820

def _isgooddelta(self, d, textlen):

1796

"""Returns True if the given delta is good. Good means that it is within

1821

"""Returns True if the given delta is good. Good means that it is within

1797

the disk span, disk size, and chain length bounds that we know to be

1822

the disk span, disk size, and chain length bounds that we know to be

1798

performant."""

1823

performant."""

1799

if d is None:

1824

if d is None:

1800

return False

1825

return False

1801

1826

1802

# - 'dist' is the distance from the base revision -- bounding it limits

1827

# - 'dist' is the distance from the base revision -- bounding it limits

1803

# the amount of I/O we need to do.

1828

# the amount of I/O we need to do.

1804

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1829

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1805

# to apply -- bounding it limits the amount of CPU we consume.

1830

# to apply -- bounding it limits the amount of CPU we consume.

1806

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1831

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1807

1832

1808

defaultmax = textlen * 4

1833

defaultmax = textlen * 4

1809

maxdist = self._maxdeltachainspan

1834

maxdist = self._maxdeltachainspan

1810

if not maxdist:

1835

if not maxdist:

1811

maxdist = dist # ensure the conditional pass

1836

maxdist = dist # ensure the conditional pass

1812

maxdist = max(maxdist, defaultmax)

1837

maxdist = max(maxdist, defaultmax)

1813

if (dist > maxdist or l > textlen or

1838

if (dist > maxdist or l > textlen or

1814

compresseddeltalen > textlen * 2 or

1839

compresseddeltalen > textlen * 2 or

1815

(self._maxchainlen and chainlen > self._maxchainlen)):

1840

(self._maxchainlen and chainlen > self._maxchainlen)):

1816

return False

1841

return False

1817

1842

1818

return True

1843

return True

1819

1844

1820

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1845

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1821

cachedelta, ifh, dfh, alwayscache=False):

1846

cachedelta, ifh, dfh, alwayscache=False):

1822

"""internal function to add revisions to the log

1847

"""internal function to add revisions to the log

1823

1848

1824

see addrevision for argument descriptions.

1849

see addrevision for argument descriptions.

1825

1850

1826

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1851

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1827

1852

1828

invariants:

1853

invariants:

1829

- rawtext is optional (can be None); if not set, cachedelta must be set.

1854

- rawtext is optional (can be None); if not set, cachedelta must be set.

1830

if both are set, they must correspond to each other.

1855

if both are set, they must correspond to each other.

1831

"""

1856

"""

1832

if node == nullid:

1857

if node == nullid:

1833

raise RevlogError(_("%s: attempt to add null revision") %

1858

raise RevlogError(_("%s: attempt to add null revision") %

1834

(self.indexfile))

1859

(self.indexfile))

1835

if node == wdirid:

1860

if node == wdirid:

1836

raise RevlogError(_("%s: attempt to add wdir revision") %

1861

raise RevlogError(_("%s: attempt to add wdir revision") %

1837

(self.indexfile))

1862

(self.indexfile))

1838

1863

1839

btext = [rawtext]

1864

btext = [rawtext]

1840

def buildtext():

1865

def buildtext():

1841

if btext[0] is not None:

1866

if btext[0] is not None:

1842

return btext[0]

1867

return btext[0]

1843

baserev = cachedelta[0]

1868

baserev = cachedelta[0]

1844

delta = cachedelta[1]

1869

delta = cachedelta[1]

1845

# special case deltas which replace entire base; no need to decode

1870

# special case deltas which replace entire base; no need to decode

1846

# base revision. this neatly avoids censored bases, which throw when

1871

# base revision. this neatly avoids censored bases, which throw when

1847

# they're decoded.

1872

# they're decoded.

1848

hlen = struct.calcsize(">lll")

1873

hlen = struct.calcsize(">lll")

1849

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1874

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1850

len(delta) - hlen):

1875

len(delta) - hlen):

1851

btext[0] = delta[hlen:]

1876

btext[0] = delta[hlen:]

1852

else:

1877

else:

1853

if self._inline:

1878

if self._inline:

1854

fh = ifh

1879

fh = ifh

1855

else:

1880

else:

1856

fh = dfh

1881

fh = dfh

1857

basetext = self.revision(baserev, _df=fh, raw=True)

1882

basetext = self.revision(baserev, _df=fh, raw=True)

1858

btext[0] = mdiff.patch(basetext, delta)

1883

btext[0] = mdiff.patch(basetext, delta)

1859

1884

1860

try:

1885

try:

1861

res = self._processflags(btext[0], flags, 'read', raw=True)

1886

res = self._processflags(btext[0], flags, 'read', raw=True)

1862

btext[0], validatehash = res

1887

btext[0], validatehash = res

1863

if validatehash:

1888

if validatehash:

1864

self.checkhash(btext[0], node, p1=p1, p2=p2)

1889

self.checkhash(btext[0], node, p1=p1, p2=p2)

1865

if flags & REVIDX_ISCENSORED:

1890

if flags & REVIDX_ISCENSORED:

1866

raise RevlogError(_('node %s is not censored') % node)

1891

raise RevlogError(_('node %s is not censored') % node)

1867

except CensoredNodeError:

1892

except CensoredNodeError:

1868

# must pass the censored index flag to add censored revisions

1893

# must pass the censored index flag to add censored revisions

1869

if not flags & REVIDX_ISCENSORED:

1894

if not flags & REVIDX_ISCENSORED:

1870

raise

1895

raise

1871

return btext[0]

1896

return btext[0]

1872

1897

1873

def builddelta(rev):

1898

def builddelta(rev):

1874

# can we use the cached delta?

1899

# can we use the cached delta?

1875

if cachedelta and cachedelta[0] == rev:

1900

if cachedelta and cachedelta[0] == rev:

1876

delta = cachedelta[1]

1901

delta = cachedelta[1]

1877

else:

1902

else:

1878

t = buildtext()

1903

t = buildtext()

1879

if self.iscensored(rev):

1904

if self.iscensored(rev):

1880

# deltas based on a censored revision must replace the

1905

# deltas based on a censored revision must replace the

1881

# full content in one patch, so delta works everywhere

1906

# full content in one patch, so delta works everywhere

1882

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1907

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1883

delta = header + t

1908

delta = header + t

1884

else:

1909

else:

1885

if self._inline:

1910

if self._inline:

1886

fh = ifh

1911

fh = ifh

1887

else:

1912

else:

1888

fh = dfh

1913

fh = dfh

1889

ptext = self.revision(rev, _df=fh, raw=True)

1914

ptext = self.revision(rev, _df=fh, raw=True)

1890

delta = mdiff.textdiff(ptext, t)

1915

delta = mdiff.textdiff(ptext, t)

1891

header, data = self.compress(delta)

1916

header, data = self.compress(delta)

1892

deltalen = len(header) + len(data)

1917

deltalen = len(header) + len(data)

1893

chainbase = self.chainbase(rev)

1918

chainbase = self.chainbase(rev)

1894

dist = deltalen + offset - self.start(chainbase)

1919

dist = deltalen + offset - self.start(chainbase)

1895

if self._generaldelta:

1920

if self._generaldelta:

1896

base = rev

1921

base = rev

1897

else:

1922

else:

1898

base = chainbase

1923

base = chainbase

1899

chainlen, compresseddeltalen = self._chaininfo(rev)

1924

chainlen, compresseddeltalen = self._chaininfo(rev)

1900

chainlen += 1

1925

chainlen += 1

1901

compresseddeltalen += deltalen

1926

compresseddeltalen += deltalen

1902

return (dist, deltalen, (header, data), base,

1927

return (dist, deltalen, (header, data), base,

1903

chainbase, chainlen, compresseddeltalen)

1928

chainbase, chainlen, compresseddeltalen)

1904

1929

1905

curr = len(self)

1930

curr = len(self)

1906

prev = curr - 1

1931

prev = curr - 1

1907

offset = self.end(prev)

1932

offset = self.end(prev)

1908

delta = None

1933

delta = None

1909

p1r, p2r = self.rev(p1), self.rev(p2)

1934

p1r, p2r = self.rev(p1), self.rev(p2)

1910

1935

1911

# full versions are inserted when the needed deltas

1936

# full versions are inserted when the needed deltas

1912

# become comparable to the uncompressed text

1937

# become comparable to the uncompressed text

1913

if rawtext is None:

1938

if rawtext is None:

1914

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1939

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1915

cachedelta[1])

1940

cachedelta[1])

1916

else:

1941

else:

1917

textlen = len(rawtext)

1942

textlen = len(rawtext)

1918

1943

1919

# should we try to build a delta?

1944

# should we try to build a delta?

1920

if prev != nullrev and self.storedeltachains:

1945

if prev != nullrev and self.storedeltachains:

1921

tested = set()

1946

tested = set()

1922

# This condition is true most of the time when processing

1947

# This condition is true most of the time when processing

1923

# changegroup data into a generaldelta repo. The only time it

1948

# changegroup data into a generaldelta repo. The only time it

1924

# isn't true is if this is the first revision in a delta chain

1949

# isn't true is if this is the first revision in a delta chain

1925

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1950

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1926

if cachedelta and self._generaldelta and self._lazydeltabase:

1951

if cachedelta and self._generaldelta and self._lazydeltabase:

1927

# Assume what we received from the server is a good choice

1952

# Assume what we received from the server is a good choice

1928

# build delta will reuse the cache

1953

# build delta will reuse the cache

1929

candidatedelta = builddelta(cachedelta[0])

1954

candidatedelta = builddelta(cachedelta[0])

1930

tested.add(cachedelta[0])

1955

tested.add(cachedelta[0])

1931

if self._isgooddelta(candidatedelta, textlen):

1956

if self._isgooddelta(candidatedelta, textlen):

1932

delta = candidatedelta

1957

delta = candidatedelta

1933

if delta is None and self._generaldelta:

1958

if delta is None and self._generaldelta:

1934

# exclude already lazy tested base if any

1959

# exclude already lazy tested base if any

1935

parents = [p for p in (p1r, p2r)

1960

parents = [p for p in (p1r, p2r)

1936

if p != nullrev and p not in tested]

1961

if p != nullrev and p not in tested]

1937

if parents and not self._aggressivemergedeltas:

1962

if parents and not self._aggressivemergedeltas:

1938

# Pick whichever parent is closer to us (to minimize the

1963

# Pick whichever parent is closer to us (to minimize the

1939

# chance of having to build a fulltext).

1964

# chance of having to build a fulltext).

1940

parents = [max(parents)]

1965

parents = [max(parents)]

1941

tested.update(parents)

1966

tested.update(parents)

1942

pdeltas = []

1967

pdeltas = []

1943

for p in parents:

1968

for p in parents:

1944

pd = builddelta(p)

1969

pd = builddelta(p)

1945

if self._isgooddelta(pd, textlen):

1970

if self._isgooddelta(pd, textlen):

1946

pdeltas.append(pd)

1971

pdeltas.append(pd)

1947

if pdeltas:

1972

if pdeltas:

1948

delta = min(pdeltas, key=lambda x: x[1])

1973

delta = min(pdeltas, key=lambda x: x[1])

1949

if delta is None and prev not in tested:

1974

if delta is None and prev not in tested:

1950

# other approach failed try against prev to hopefully save us a

1975

# other approach failed try against prev to hopefully save us a

1951

# fulltext.

1976

# fulltext.

1952

candidatedelta = builddelta(prev)

1977

candidatedelta = builddelta(prev)

1953

if self._isgooddelta(candidatedelta, textlen):

1978

if self._isgooddelta(candidatedelta, textlen):

1954

delta = candidatedelta

1979

delta = candidatedelta

1955

if delta is not None:

1980

if delta is not None:

1956

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1981

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1957

else:

1982

else:

1958

rawtext = buildtext()

1983

rawtext = buildtext()

1959

data = self.compress(rawtext)

1984

data = self.compress(rawtext)

1960

l = len(data[1]) + len(data[0])

1985

l = len(data[1]) + len(data[0])

1961

base = chainbase = curr

1986

base = chainbase = curr

1962

1987

1963

e = (offset_type(offset, flags), l, textlen,

1988

e = (offset_type(offset, flags), l, textlen,

1964

base, link, p1r, p2r, node)

1989

base, link, p1r, p2r, node)

1965

self.index.insert(-1, e)

1990

self.index.insert(-1, e)

1966

self.nodemap[node] = curr

1991

self.nodemap[node] = curr

1967

1992

1968

entry = self._io.packentry(e, self.node, self.version, curr)

1993

entry = self._io.packentry(e, self.node, self.version, curr)

1969

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1994

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1970

1995

1971

if alwayscache and rawtext is None:

1996

if alwayscache and rawtext is None:

1972

rawtext = buildtext()

1997

rawtext = buildtext()

1973

1998

1974

if type(rawtext) == str: # only accept immutable objects

1999

if type(rawtext) == str: # only accept immutable objects

1975

self._cache = (node, curr, rawtext)

2000

self._cache = (node, curr, rawtext)

1976

self._chainbasecache[curr] = chainbase

2001

self._chainbasecache[curr] = chainbase

1977

return node

2002

return node

1978

2003

1979

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

2004

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1980

# Files opened in a+ mode have inconsistent behavior on various

2005

# Files opened in a+ mode have inconsistent behavior on various

1981

# platforms. Windows requires that a file positioning call be made

2006

# platforms. Windows requires that a file positioning call be made

1982

# when the file handle transitions between reads and writes. See

2007

# when the file handle transitions between reads and writes. See

1983

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

2008

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1984

# platforms, Python or the platform itself can be buggy. Some versions

2009

# platforms, Python or the platform itself can be buggy. Some versions

1985

# of Solaris have been observed to not append at the end of the file

2010

# of Solaris have been observed to not append at the end of the file

1986

# if the file was seeked to before the end. See issue4943 for more.

2011

# if the file was seeked to before the end. See issue4943 for more.

1987

#

2012

#

1988

# We work around this issue by inserting a seek() before writing.

2013

# We work around this issue by inserting a seek() before writing.

1989

# Note: This is likely not necessary on Python 3.

2014

# Note: This is likely not necessary on Python 3.

1990

ifh.seek(0, os.SEEK_END)

2015

ifh.seek(0, os.SEEK_END)

1991

if dfh:

2016

if dfh:

1992

dfh.seek(0, os.SEEK_END)

2017

dfh.seek(0, os.SEEK_END)

1993

2018

1994

curr = len(self) - 1

2019

curr = len(self) - 1

1995

if not self._inline:

2020

if not self._inline:

1996

transaction.add(self.datafile, offset)

2021

transaction.add(self.datafile, offset)

1997

transaction.add(self.indexfile, curr * len(entry))

2022

transaction.add(self.indexfile, curr * len(entry))

1998

if data[0]:

2023

if data[0]:

1999

dfh.write(data[0])

2024

dfh.write(data[0])

2000

dfh.write(data[1])

2025

dfh.write(data[1])

2001

ifh.write(entry)

2026

ifh.write(entry)

2002

else:

2027

else:

2003

offset += curr * self._io.size

2028

offset += curr * self._io.size

2004

transaction.add(self.indexfile, offset, curr)

2029

transaction.add(self.indexfile, offset, curr)

2005

ifh.write(entry)

2030

ifh.write(entry)

2006

ifh.write(data[0])

2031

ifh.write(data[0])

2007

ifh.write(data[1])

2032

ifh.write(data[1])

2008

self.checkinlinesize(transaction, ifh)

2033

self.checkinlinesize(transaction, ifh)

2009

2034

2010

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2035

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2011

"""

2036

"""

2012

add a delta group

2037

add a delta group

2013

2038

2014

given a set of deltas, add them to the revision log. the

2039

given a set of deltas, add them to the revision log. the

2015

first delta is against its parent, which should be in our

2040

first delta is against its parent, which should be in our

2016

log, the rest are against the previous delta.

2041

log, the rest are against the previous delta.

2017

2042

2018

If ``addrevisioncb`` is defined, it will be called with arguments of

2043

If ``addrevisioncb`` is defined, it will be called with arguments of

2019

this revlog and the node that was added.

2044

this revlog and the node that was added.

2020

"""

2045

"""

2021

2046

2022

nodes = []

2047

nodes = []

2023

2048

2024

r = len(self)

2049

r = len(self)

2025

end = 0

2050

end = 0

2026

if r:

2051

if r:

2027

end = self.end(r - 1)

2052

end = self.end(r - 1)

2028

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

2053

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

2029

isize = r * self._io.size

2054

isize = r * self._io.size

2030

if self._inline:

2055

if self._inline:

2031

transaction.add(self.indexfile, end + isize, r)

2056

transaction.add(self.indexfile, end + isize, r)

2032

dfh = None

2057

dfh = None

2033

else:

2058

else:

2034

transaction.add(self.indexfile, isize, r)

2059

transaction.add(self.indexfile, isize, r)

2035

transaction.add(self.datafile, end)

2060

transaction.add(self.datafile, end)

2036

dfh = self.opener(self.datafile, "a+")

2061

dfh = self.opener(self.datafile, "a+")

2037

def flush():

2062

def flush():

2038

if dfh:

2063

if dfh:

2039

dfh.flush()

2064

dfh.flush()

2040

ifh.flush()

2065

ifh.flush()

2041

try:

2066

try:

2042

# loop through our set of deltas

2067

# loop through our set of deltas

2043

for data in deltas:

2068

for data in deltas:

2044

node, p1, p2, linknode, deltabase, delta, flags = data

2069

node, p1, p2, linknode, deltabase, delta, flags = data

2045

link = linkmapper(linknode)

2070

link = linkmapper(linknode)

2046

flags = flags or REVIDX_DEFAULT_FLAGS

2071

flags = flags or REVIDX_DEFAULT_FLAGS

2047

2072

2048

nodes.append(node)

2073

nodes.append(node)

2049

2074

2050

if node in self.nodemap:

2075

if node in self.nodemap:

2051

# this can happen if two branches make the same change

2076

# this can happen if two branches make the same change

2052

continue

2077

continue

2053

2078

2054

for p in (p1, p2):

2079

for p in (p1, p2):

2055

if p not in self.nodemap:

2080

if p not in self.nodemap:

2056

raise LookupError(p, self.indexfile,

2081

raise LookupError(p, self.indexfile,

2057

_('unknown parent'))

2082

_('unknown parent'))

2058

2083

2059

if deltabase not in self.nodemap:

2084

if deltabase not in self.nodemap:

2060

raise LookupError(deltabase, self.indexfile,

2085

raise LookupError(deltabase, self.indexfile,

2061

_('unknown delta base'))

2086

_('unknown delta base'))

2062

2087

2063

baserev = self.rev(deltabase)

2088

baserev = self.rev(deltabase)

2064

2089

2065

if baserev != nullrev and self.iscensored(baserev):

2090

if baserev != nullrev and self.iscensored(baserev):

2066

# if base is censored, delta must be full replacement in a

2091

# if base is censored, delta must be full replacement in a

2067

# single patch operation

2092

# single patch operation

2068

hlen = struct.calcsize(">lll")

2093

hlen = struct.calcsize(">lll")

2069

oldlen = self.rawsize(baserev)

2094

oldlen = self.rawsize(baserev)

2070

newlen = len(delta) - hlen

2095

newlen = len(delta) - hlen

2071

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2096

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2072

raise error.CensoredBaseError(self.indexfile,

2097

raise error.CensoredBaseError(self.indexfile,

2073

self.node(baserev))

2098

self.node(baserev))

2074

2099

2075

if not flags and self._peek_iscensored(baserev, delta, flush):

2100

if not flags and self._peek_iscensored(baserev, delta, flush):

2076

flags |= REVIDX_ISCENSORED

2101

flags |= REVIDX_ISCENSORED

2077

2102

2078

# We assume consumers of addrevisioncb will want to retrieve

2103

# We assume consumers of addrevisioncb will want to retrieve

2079

# the added revision, which will require a call to

2104

# the added revision, which will require a call to

2080

# revision(). revision() will fast path if there is a cache

2105

# revision(). revision() will fast path if there is a cache

2081

# hit. So, we tell _addrevision() to always cache in this case.

2106

# hit. So, we tell _addrevision() to always cache in this case.

2082

# We're only using addgroup() in the context of changegroup

2107

# We're only using addgroup() in the context of changegroup

2083

# generation so the revision data can always be handled as raw

2108

# generation so the revision data can always be handled as raw

2084

# by the flagprocessor.

2109

# by the flagprocessor.

2085

self._addrevision(node, None, transaction, link,

2110

self._addrevision(node, None, transaction, link,

2086

p1, p2, flags, (baserev, delta),

2111

p1, p2, flags, (baserev, delta),

2087

ifh, dfh,

2112

ifh, dfh,

2088

alwayscache=bool(addrevisioncb))

2113

alwayscache=bool(addrevisioncb))

2089

2114

2090

if addrevisioncb:

2115

if addrevisioncb:

2091

addrevisioncb(self, node)

2116

addrevisioncb(self, node)

2092

2117

2093

if not dfh and not self._inline:

2118

if not dfh and not self._inline:

2094

# addrevision switched from inline to conventional

2119

# addrevision switched from inline to conventional

2095

# reopen the index

2120

# reopen the index

2096

ifh.close()

2121

ifh.close()

2097

dfh = self.opener(self.datafile, "a+")

2122

dfh = self.opener(self.datafile, "a+")

2098

ifh = self.opener(self.indexfile, "a+",

2123

ifh = self.opener(self.indexfile, "a+",

2099

checkambig=self._checkambig)

2124

checkambig=self._checkambig)

2100

finally:

2125

finally:

2101

if dfh:

2126

if dfh:

2102

dfh.close()

2127

dfh.close()

2103

ifh.close()

2128

ifh.close()

2104

2129

2105

return nodes

2130

return nodes

2106

2131

2107

def iscensored(self, rev):

2132

def iscensored(self, rev):

2108

"""Check if a file revision is censored."""

2133

"""Check if a file revision is censored."""

2109

return False

2134

return False

2110

2135

2111

def _peek_iscensored(self, baserev, delta, flush):

2136

def _peek_iscensored(self, baserev, delta, flush):

2112

"""Quickly check if a delta produces a censored revision."""

2137

"""Quickly check if a delta produces a censored revision."""

2113

return False

2138

return False

2114

2139

2115

def getstrippoint(self, minlink):

2140

def getstrippoint(self, minlink):

2116

"""find the minimum rev that must be stripped to strip the linkrev

2141

"""find the minimum rev that must be stripped to strip the linkrev

2117

2142

2118

Returns a tuple containing the minimum rev and a set of all revs that

2143

Returns a tuple containing the minimum rev and a set of all revs that

2119

have linkrevs that will be broken by this strip.

2144

have linkrevs that will be broken by this strip.

2120

"""

2145

"""

2121

brokenrevs = set()

2146

brokenrevs = set()

2122

strippoint = len(self)

2147

strippoint = len(self)

2123

2148

2124

heads = {}

2149

heads = {}

2125

futurelargelinkrevs = set()

2150

futurelargelinkrevs = set()

2126

for head in self.headrevs():

2151

for head in self.headrevs():

2127

headlinkrev = self.linkrev(head)

2152

headlinkrev = self.linkrev(head)

2128

heads[head] = headlinkrev

2153

heads[head] = headlinkrev

2129

if headlinkrev >= minlink:

2154

if headlinkrev >= minlink:

2130

futurelargelinkrevs.add(headlinkrev)

2155

futurelargelinkrevs.add(headlinkrev)

2131

2156

2132

# This algorithm involves walking down the rev graph, starting at the

2157

# This algorithm involves walking down the rev graph, starting at the

2133

# heads. Since the revs are topologically sorted according to linkrev,

2158

# heads. Since the revs are topologically sorted according to linkrev,

2134

# once all head linkrevs are below the minlink, we know there are

2159

# once all head linkrevs are below the minlink, we know there are

2135

# no more revs that could have a linkrev greater than minlink.

2160

# no more revs that could have a linkrev greater than minlink.

2136

# So we can stop walking.

2161

# So we can stop walking.

2137

while futurelargelinkrevs:

2162

while futurelargelinkrevs:

2138

strippoint -= 1

2163

strippoint -= 1

2139

linkrev = heads.pop(strippoint)

2164

linkrev = heads.pop(strippoint)

2140

2165

2141

if linkrev < minlink:

2166

if linkrev < minlink:

2142

brokenrevs.add(strippoint)

2167

brokenrevs.add(strippoint)

2143

else:

2168

else:

2144

futurelargelinkrevs.remove(linkrev)

2169

futurelargelinkrevs.remove(linkrev)

2145

2170

2146

for p in self.parentrevs(strippoint):

2171

for p in self.parentrevs(strippoint):

2147

if p != nullrev:

2172

if p != nullrev:

2148

plinkrev = self.linkrev(p)

2173

plinkrev = self.linkrev(p)

2149

heads[p] = plinkrev

2174

heads[p] = plinkrev

2150

if plinkrev >= minlink:

2175

if plinkrev >= minlink:

2151

futurelargelinkrevs.add(plinkrev)

2176

futurelargelinkrevs.add(plinkrev)

2152

2177

2153

return strippoint, brokenrevs

2178

return strippoint, brokenrevs

2154

2179

2155

def strip(self, minlink, transaction):

2180

def strip(self, minlink, transaction):

2156

"""truncate the revlog on the first revision with a linkrev >= minlink

2181

"""truncate the revlog on the first revision with a linkrev >= minlink

2157

2182

2158

This function is called when we're stripping revision minlink and

2183

This function is called when we're stripping revision minlink and

2159

its descendants from the repository.

2184

its descendants from the repository.

2160

2185

2161

We have to remove all revisions with linkrev >= minlink, because

2186

We have to remove all revisions with linkrev >= minlink, because

2162

the equivalent changelog revisions will be renumbered after the

2187

the equivalent changelog revisions will be renumbered after the

2163

strip.

2188

strip.

2164

2189

2165

So we truncate the revlog on the first of these revisions, and

2190

So we truncate the revlog on the first of these revisions, and

2166

trust that the caller has saved the revisions that shouldn't be

2191

trust that the caller has saved the revisions that shouldn't be

2167

removed and that it'll re-add them after this truncation.

2192

removed and that it'll re-add them after this truncation.

2168

"""

2193

"""

2169

if len(self) == 0:

2194

if len(self) == 0:

2170

return

2195

return

2171

2196

2172

rev, _ = self.getstrippoint(minlink)

2197

rev, _ = self.getstrippoint(minlink)

2173

if rev == len(self):

2198

if rev == len(self):

2174

return

2199

return

2175

2200

2176

# first truncate the files on disk

2201

# first truncate the files on disk

2177

end = self.start(rev)

2202

end = self.start(rev)

2178

if not self._inline:

2203

if not self._inline:

2179

transaction.add(self.datafile, end)

2204

transaction.add(self.datafile, end)

2180

end = rev * self._io.size

2205

end = rev * self._io.size

2181

else:

2206

else:

2182

end += rev * self._io.size

2207

end += rev * self._io.size

2183

2208

2184

transaction.add(self.indexfile, end)

2209

transaction.add(self.indexfile, end)

2185

2210

2186

# then reset internal state in memory to forget those revisions

2211

# then reset internal state in memory to forget those revisions

2187

self._cache = None

2212

self._cache = None

2188

self._chaininfocache = {}

2213

self._chaininfocache = {}

2189

self._chunkclear()

2214

self._chunkclear()

2190

for x in xrange(rev, len(self)):

2215

for x in xrange(rev, len(self)):

2191

del self.nodemap[self.node(x)]

2216

del self.nodemap[self.node(x)]

2192

2217

2193

del self.index[rev:-1]

2218

del self.index[rev:-1]

2194

2219

2195

def checksize(self):

2220

def checksize(self):

2196

expected = 0

2221

expected = 0

2197

if len(self):

2222

if len(self):

2198

expected = max(0, self.end(len(self) - 1))

2223

expected = max(0, self.end(len(self) - 1))

2199

2224

2200

try:

2225

try:

2201

f = self.opener(self.datafile)

2226

f = self.opener(self.datafile)

2202

f.seek(0, 2)

2227

f.seek(0, 2)

2203

actual = f.tell()

2228

actual = f.tell()

2204

f.close()

2229

f.close()

2205

dd = actual - expected

2230

dd = actual - expected

2206

except IOError as inst:

2231

except IOError as inst:

2207

if inst.errno != errno.ENOENT:

2232

if inst.errno != errno.ENOENT:

2208

raise

2233

raise

2209

dd = 0

2234

dd = 0

2210

2235

2211

try:

2236

try:

2212

f = self.opener(self.indexfile)

2237

f = self.opener(self.indexfile)

2213

f.seek(0, 2)

2238

f.seek(0, 2)

2214

actual = f.tell()

2239

actual = f.tell()

2215

f.close()

2240

f.close()

2216

s = self._io.size

2241

s = self._io.size

2217

i = max(0, actual // s)

2242

i = max(0, actual // s)

2218

di = actual - (i * s)

2243

di = actual - (i * s)

2219

if self._inline:

2244

if self._inline:

2220

databytes = 0

2245

databytes = 0

2221

for r in self:

2246

for r in self:

2222

databytes += max(0, self.length(r))

2247

databytes += max(0, self.length(r))

2223

dd = 0

2248

dd = 0

2224

di = actual - len(self) * s - databytes

2249

di = actual - len(self) * s - databytes

2225

except IOError as inst:

2250

except IOError as inst:

2226

if inst.errno != errno.ENOENT:

2251

if inst.errno != errno.ENOENT:

2227

raise

2252

raise

2228

di = 0

2253

di = 0

2229

2254

2230

return (dd, di)

2255

return (dd, di)

2231

2256

2232

def files(self):

2257

def files(self):

2233

res = [self.indexfile]

2258

res = [self.indexfile]

2234

if not self._inline:

2259

if not self._inline:

2235

res.append(self.datafile)

2260

res.append(self.datafile)

2236

return res

2261

return res

2237

2262

2238

DELTAREUSEALWAYS = 'always'

2263

DELTAREUSEALWAYS = 'always'

2239

DELTAREUSESAMEREVS = 'samerevs'

2264

DELTAREUSESAMEREVS = 'samerevs'

2240

DELTAREUSENEVER = 'never'

2265

DELTAREUSENEVER = 'never'

2241

2266

2242

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2267

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2243

2268

2244

def clone(self, tr, destrevlog, addrevisioncb=None,

2269

def clone(self, tr, destrevlog, addrevisioncb=None,

2245

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2270

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2246

"""Copy this revlog to another, possibly with format changes.

2271

"""Copy this revlog to another, possibly with format changes.

2247

2272

2248

The destination revlog will contain the same revisions and nodes.

2273

The destination revlog will contain the same revisions and nodes.

2249

However, it may not be bit-for-bit identical due to e.g. delta encoding

2274

However, it may not be bit-for-bit identical due to e.g. delta encoding

2250

differences.

2275

differences.

2251

2276

2252

The ``deltareuse`` argument control how deltas from the existing revlog

2277

The ``deltareuse`` argument control how deltas from the existing revlog

2253

are preserved in the destination revlog. The argument can have the

2278

are preserved in the destination revlog. The argument can have the

2254

following values:

2279

following values:

2255

2280

2256

DELTAREUSEALWAYS

2281

DELTAREUSEALWAYS

2257

Deltas will always be reused (if possible), even if the destination

2282

Deltas will always be reused (if possible), even if the destination

2258

revlog would not select the same revisions for the delta. This is the

2283

revlog would not select the same revisions for the delta. This is the

2259

fastest mode of operation.

2284

fastest mode of operation.

2260

DELTAREUSESAMEREVS

2285

DELTAREUSESAMEREVS

2261

Deltas will be reused if the destination revlog would pick the same

2286

Deltas will be reused if the destination revlog would pick the same

2262

revisions for the delta. This mode strikes a balance between speed

2287

revisions for the delta. This mode strikes a balance between speed

2263

and optimization.

2288

and optimization.

2264

DELTAREUSENEVER

2289

DELTAREUSENEVER

2265

Deltas will never be reused. This is the slowest mode of execution.

2290

Deltas will never be reused. This is the slowest mode of execution.

2266

This mode can be used to recompute deltas (e.g. if the diff/delta

2291

This mode can be used to recompute deltas (e.g. if the diff/delta

2267

algorithm changes).

2292

algorithm changes).

2268

2293

2269

Delta computation can be slow, so the choice of delta reuse policy can

2294

Delta computation can be slow, so the choice of delta reuse policy can

2270

significantly affect run time.

2295

significantly affect run time.

2271

2296

2272

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2297

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2273

two extremes. Deltas will be reused if they are appropriate. But if the

2298

two extremes. Deltas will be reused if they are appropriate. But if the

2274

delta could choose a better revision, it will do so. This means if you

2299

delta could choose a better revision, it will do so. This means if you

2275

are converting a non-generaldelta revlog to a generaldelta revlog,

2300

are converting a non-generaldelta revlog to a generaldelta revlog,

2276

deltas will be recomputed if the delta's parent isn't a parent of the

2301

deltas will be recomputed if the delta's parent isn't a parent of the

2277

revision.

2302

revision.

2278

2303

2279

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2304

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2280

controls whether to compute deltas against both parents for merges.

2305

controls whether to compute deltas against both parents for merges.

2281

By default, the current default is used.

2306

By default, the current default is used.

2282

"""

2307

"""

2283

if deltareuse not in self.DELTAREUSEALL:

2308

if deltareuse not in self.DELTAREUSEALL:

2284

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2309

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2285

2310

2286

if len(destrevlog):

2311

if len(destrevlog):

2287

raise ValueError(_('destination revlog is not empty'))

2312

raise ValueError(_('destination revlog is not empty'))

2288

2313

2289

if getattr(self, 'filteredrevs', None):

2314

if getattr(self, 'filteredrevs', None):

2290

raise ValueError(_('source revlog has filtered revisions'))

2315

raise ValueError(_('source revlog has filtered revisions'))

2291

if getattr(destrevlog, 'filteredrevs', None):

2316

if getattr(destrevlog, 'filteredrevs', None):

2292

raise ValueError(_('destination revlog has filtered revisions'))

2317

raise ValueError(_('destination revlog has filtered revisions'))

2293

2318

2294

# lazydeltabase controls whether to reuse a cached delta, if possible.

2319

# lazydeltabase controls whether to reuse a cached delta, if possible.

2295

oldlazydeltabase = destrevlog._lazydeltabase

2320

oldlazydeltabase = destrevlog._lazydeltabase

2296

oldamd = destrevlog._aggressivemergedeltas

2321

oldamd = destrevlog._aggressivemergedeltas

2297

2322

2298

try:

2323

try:

2299

if deltareuse == self.DELTAREUSEALWAYS:

2324

if deltareuse == self.DELTAREUSEALWAYS:

2300

destrevlog._lazydeltabase = True

2325

destrevlog._lazydeltabase = True

2301

elif deltareuse == self.DELTAREUSESAMEREVS:

2326

elif deltareuse == self.DELTAREUSESAMEREVS:

2302

destrevlog._lazydeltabase = False

2327

destrevlog._lazydeltabase = False

2303

2328

2304

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2329

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2305

2330

2306

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2331

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2307

self.DELTAREUSESAMEREVS)

2332

self.DELTAREUSESAMEREVS)

2308

2333

2309

index = self.index

2334

index = self.index

2310

for rev in self:

2335

for rev in self:

2311

entry = index[rev]

2336

entry = index[rev]

2312

2337

2313

# Some classes override linkrev to take filtered revs into

2338

# Some classes override linkrev to take filtered revs into

2314

# account. Use raw entry from index.

2339

# account. Use raw entry from index.

2315

flags = entry[0] & 0xffff

2340

flags = entry[0] & 0xffff

2316

linkrev = entry[4]

2341

linkrev = entry[4]

2317

p1 = index[entry[5]][7]

2342

p1 = index[entry[5]][7]

2318

p2 = index[entry[6]][7]

2343

p2 = index[entry[6]][7]

2319

node = entry[7]

2344

node = entry[7]

2320

2345

2321

# (Possibly) reuse the delta from the revlog if allowed and

2346

# (Possibly) reuse the delta from the revlog if allowed and

2322

# the revlog chunk is a delta.

2347

# the revlog chunk is a delta.

2323

cachedelta = None

2348

cachedelta = None

2324

rawtext = None

2349

rawtext = None

2325

if populatecachedelta:

2350

if populatecachedelta:

2326

dp = self.deltaparent(rev)

2351

dp = self.deltaparent(rev)

2327

if dp != nullrev:

2352

if dp != nullrev:

2328

cachedelta = (dp, str(self._chunk(rev)))

2353

cachedelta = (dp, str(self._chunk(rev)))

2329

2354

2330

if not cachedelta:

2355

if not cachedelta:

2331

rawtext = self.revision(rev, raw=True)

2356

rawtext = self.revision(rev, raw=True)

2332

2357

2333

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2358

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2334

checkambig=False)

2359

checkambig=False)

2335

dfh = None

2360

dfh = None

2336

if not destrevlog._inline:

2361

if not destrevlog._inline:

2337

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2362

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2338

try:

2363

try:

2339

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2364

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2340

flags, cachedelta, ifh, dfh)

2365

flags, cachedelta, ifh, dfh)

2341

finally:

2366

finally:

2342

if dfh:

2367

if dfh:

2343

dfh.close()

2368

dfh.close()

2344

ifh.close()

2369

ifh.close()

2345

2370

2346

if addrevisioncb:

2371

if addrevisioncb:

2347

addrevisioncb(self, rev, node)

2372

addrevisioncb(self, rev, node)

2348

finally:

2373

finally:

2349

destrevlog._lazydeltabase = oldlazydeltabase

2374

destrevlog._lazydeltabase = oldlazydeltabase

2350

destrevlog._aggressivemergedeltas = oldamd

2375

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import errno
             import hashlib
             import heapq
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
+            def _trimchunk(revlog, revs, startidx, endidx=None):
+                """returns revs[startidx:endidx] without empty trailing revs
+                """
+                length = revlog.length
+                if endidx is None:
+                    endidx = len(revs)
+                # Trim empty revs at the end, but never the very first revision of a chain
+                while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
+                    endidx -= 1
+                return revs[startidx:endidx]
             def _slicechunk(revlog, revs):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 """
                 start = revlog.start
                 length = revlog.length
                 if len(revs) <= 1:
                     yield revs
                     return
                 startbyte = start(revs[0])
                 endbyte = start(revs[-1]) + length(revs[-1])
                 readdata = deltachainspan = endbyte - startbyte
                 chainpayload = sum(length(r) for r in revs)
                 if deltachainspan:
                     density = chainpayload / float(deltachainspan)
                 else:
                     density = 1.0
                 # Store the gaps in a heap to have them sorted by decreasing size
                 gapsheap = []
                 heapq.heapify(gapsheap)
                 prevend = None
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
+                    # Skip empty revisions to form larger holes
+                    if revlen == 0:
+                        continue
                     if prevend is not None:
                         gapsize = revstart - prevend
                         # only consider holes that are large enough
                         if gapsize > revlog._srmingapsize:
                             heapq.heappush(gapsheap, (-gapsize, i))
                     prevend = revstart + revlen
                 # Collect the indices of the largest holes until the density is acceptable
                 indicesheap = []
                 heapq.heapify(indicesheap)
                 while gapsheap and density < revlog._srdensitythreshold:
                     oppgapsize, gapidx = heapq.heappop(gapsheap)
                     heapq.heappush(indicesheap, gapidx)
                     # the gap sizes are stored as negatives to be sorted decreasingly
                     # by the heap
                     readdata -= (-oppgapsize)
                     if readdata > 0:
                         density = chainpayload / float(readdata)
                     else:
                         density = 1.0
                 # Cut the revs at collected indices
                 previdx = 0
                 while indicesheap:
                     idx = heapq.heappop(indicesheap)
-                    yield revs[previdx:idx]
+                    chunk = _trimchunk(revlog, revs, previdx, idx)
+                    if chunk:
+                        yield chunk
                     previdx = idx
-                yield revs[previdx:]
+                chunk = _trimchunk(revlog, revs, previdx)
+                if chunk:
+                    yield chunk
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._srdensitythreshold = 0.25
                     self._srmingapsize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._withsparseread = bool(opts.get('with-sparse-read', False))
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-gap-size' in opts:
                             self._srmingapsize = opts['sparse-read-min-gap-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         if (mmapindexthreshold is not None and
                                 self.opener.fstat(f).st_size >= mmapindexthreshold):
                             indexdata = util.buffer(util.mmapread(f))
                         else:
                             indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         return self.index[rev][5:7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return pycompat.maplist(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except (TypeError, binascii.Error):
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, hexnode, minlength=1):
                     """Find the shortest unambiguous prefix that matches hexnode."""
                     def isvalid(test):
                         try:
                             if self._partialmatch(test) is None:
                                 return False
                             try:
                                 i = int(test)
                                 # if we are a pure int, then starting with zero will not be
                                 # confused as a rev; or, obviously, if the int is larger
                                 # than the value of the tip rev
                                 if test[0] == '0' or i > len(self):
                                     return True
                                 return False
                             except ValueError:
                                 return True
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                     shortest = hexnode
                     startlength = max(6, minlength)
                     length = startlength
                     while True:
                         test = hexnode[:length]
                         if isvalid(test):
                             shortest = test
                             if length == minlength or length > startlength:
                                 return shortest
                             length -= 1
                         else:
                             length += 1
                             if len(shortest) <= length:
                                 return shortest
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, pycompat.bytestr(revornode)))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~FLAG_INLINE_DATA
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = dist # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if (dist > maxdist or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     btext = [rawtext]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(baserev, _df=fh, raw=True)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=True)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(rev, _df=fh, raw=True)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         rawtext = buildtext()
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = buildtext()
                     if type(rawtext) == str: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb))
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = {'always', 'samerevs', 'never'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd