upstream/mercurial-mirror Commit - r34881:9e18ab7f

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import binascii

16

import binascii

17

import collections

17

import collections

18

import errno

18

import errno

19

import hashlib

19

import hashlib

20

import heapq

20

import os

21

import os

21

import struct

22

import struct

22

import zlib

23

import zlib

23

24

# import stuff from node for others to import from revlog

25

# import stuff from node for others to import from revlog

25

from .node import (

26

from .node import (

26

bin,

27

bin,

27

hex,

28

hex,

28

nullid,

29

nullid,

29

nullrev,

30

nullrev,

30

wdirhex,

31

wdirhex,

31

wdirid,

32

wdirid,

32

wdirrev,

33

wdirrev,

33

)

34

)

34

from .i18n import _

35

from .i18n import _

35

from . import (

36

from . import (

36

ancestor,

37

ancestor,

37

error,

38

error,

38

mdiff,

39

mdiff,

39

policy,

40

policy,

40

pycompat,

41

pycompat,

41

templatefilters,

42

templatefilters,

42

util,

43

util,

43

)

44

)

44

45

parsers = policy.importmod(r'parsers')

46

parsers = policy.importmod(r'parsers')

46

47

# Aliased for performance.

48

# Aliased for performance.

48

_zlibdecompress = zlib.decompress

49

_zlibdecompress = zlib.decompress

49

50

# revlog header flags

51

# revlog header flags

51

REVLOGV0 = 0

52

REVLOGV0 = 0

52

REVLOGV1 = 1

53

REVLOGV1 = 1

53

# Dummy value until file format is finalized.

54

# Dummy value until file format is finalized.

54

# Reminder: change the bounds check in revlog.__init__ when this is changed.

55

# Reminder: change the bounds check in revlog.__init__ when this is changed.

55

REVLOGV2 = 0xDEAD

56

REVLOGV2 = 0xDEAD

56

FLAG_INLINE_DATA = (1 << 16)

57

FLAG_INLINE_DATA = (1 << 16)

57

FLAG_GENERALDELTA = (1 << 17)

58

FLAG_GENERALDELTA = (1 << 17)

58

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

59

REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA

59

REVLOG_DEFAULT_FORMAT = REVLOGV1

60

REVLOG_DEFAULT_FORMAT = REVLOGV1

60

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

61

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

61

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

62

REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA

62

REVLOGV2_FLAGS = REVLOGV1_FLAGS

63

REVLOGV2_FLAGS = REVLOGV1_FLAGS

63

64

# revlog index flags

65

# revlog index flags

65

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

66

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

66

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

67

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

67

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

68

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

68

REVIDX_DEFAULT_FLAGS = 0

69

REVIDX_DEFAULT_FLAGS = 0

69

# stable order in which flags need to be processed and their processors applied

70

# stable order in which flags need to be processed and their processors applied

70

REVIDX_FLAGS_ORDER = [

71

REVIDX_FLAGS_ORDER = [

71

REVIDX_ISCENSORED,

72

REVIDX_ISCENSORED,

72

REVIDX_ELLIPSIS,

73

REVIDX_ELLIPSIS,

73

REVIDX_EXTSTORED,

74

REVIDX_EXTSTORED,

74

]

75

]

75

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

76

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

76

77

# max size of revlog with inline data

78

# max size of revlog with inline data

78

_maxinline = 131072

79

_maxinline = 131072

79

_chunksize = 1048576

80

_chunksize = 1048576

80

81

RevlogError = error.RevlogError

82

RevlogError = error.RevlogError

82

LookupError = error.LookupError

83

LookupError = error.LookupError

83

CensoredNodeError = error.CensoredNodeError

84

CensoredNodeError = error.CensoredNodeError

84

ProgrammingError = error.ProgrammingError

85

ProgrammingError = error.ProgrammingError

85

86

# Store flag processors (cf. 'addflagprocessor()' to register)

87

# Store flag processors (cf. 'addflagprocessor()' to register)

87

_flagprocessors = {

88

_flagprocessors = {

88

REVIDX_ISCENSORED: None,

89

REVIDX_ISCENSORED: None,

89

}

90

}

90

91

def addflagprocessor(flag, processor):

92

def addflagprocessor(flag, processor):

92

"""Register a flag processor on a revision data flag.

93

"""Register a flag processor on a revision data flag.

93

94

Invariant:

95

Invariant:

95

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

96

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

96

- Only one flag processor can be registered on a specific flag.

97

- Only one flag processor can be registered on a specific flag.

97

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

98

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

98

following signatures:

99

following signatures:

99

- (read) f(self, rawtext) -> text, bool

100

- (read) f(self, rawtext) -> text, bool

100

- (write) f(self, text) -> rawtext, bool

101

- (write) f(self, text) -> rawtext, bool

101

- (raw) f(self, rawtext) -> bool

102

- (raw) f(self, rawtext) -> bool

102

"text" is presented to the user. "rawtext" is stored in revlog data, not

103

"text" is presented to the user. "rawtext" is stored in revlog data, not

103

directly visible to the user.

104

directly visible to the user.

104

The boolean returned by these transforms is used to determine whether

105

The boolean returned by these transforms is used to determine whether

105

the returned text can be used for hash integrity checking. For example,

106

the returned text can be used for hash integrity checking. For example,

106

if "write" returns False, then "text" is used to generate hash. If

107

if "write" returns False, then "text" is used to generate hash. If

107

"write" returns True, that basically means "rawtext" returned by "write"

108

"write" returns True, that basically means "rawtext" returned by "write"

108

should be used to generate hash. Usually, "write" and "read" return

109

should be used to generate hash. Usually, "write" and "read" return

109

different booleans. And "raw" returns a same boolean as "write".

110

different booleans. And "raw" returns a same boolean as "write".

110

111

Note: The 'raw' transform is used for changegroup generation and in some

112

Note: The 'raw' transform is used for changegroup generation and in some

112

debug commands. In this case the transform only indicates whether the

113

debug commands. In this case the transform only indicates whether the

113

contents can be used for hash integrity checks.

114

contents can be used for hash integrity checks.

114

"""

115

"""

115

if not flag & REVIDX_KNOWN_FLAGS:

116

if not flag & REVIDX_KNOWN_FLAGS:

116

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

117

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

117

raise ProgrammingError(msg)

118

raise ProgrammingError(msg)

118

if flag not in REVIDX_FLAGS_ORDER:

119

if flag not in REVIDX_FLAGS_ORDER:

119

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

120

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

120

raise ProgrammingError(msg)

121

raise ProgrammingError(msg)

121

if flag in _flagprocessors:

122

if flag in _flagprocessors:

122

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

123

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

123

raise error.Abort(msg)

124

raise error.Abort(msg)

124

_flagprocessors[flag] = processor

125

_flagprocessors[flag] = processor

125

126

def getoffset(q):

127

def getoffset(q):

127

return int(q >> 16)

128

return int(q >> 16)

128

129

def gettype(q):

130

def gettype(q):

130

return int(q & 0xFFFF)

131

return int(q & 0xFFFF)

131

132

def offset_type(offset, type):

133

def offset_type(offset, type):

133

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

134

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

134

raise ValueError('unknown revlog index flags')

135

raise ValueError('unknown revlog index flags')

135

return int(int(offset) << 16 | type)

136

return int(int(offset) << 16 | type)

136

137

_nullhash = hashlib.sha1(nullid)

138

_nullhash = hashlib.sha1(nullid)

138

139

def hash(text, p1, p2):

140

def hash(text, p1, p2):

140

"""generate a hash from the given text and its parent hashes

141

"""generate a hash from the given text and its parent hashes

141

142

This hash combines both the current file contents and its history

143

This hash combines both the current file contents and its history

143

in a manner that makes it easy to distinguish nodes with the same

144

in a manner that makes it easy to distinguish nodes with the same

144

content in the revision graph.

145

content in the revision graph.

145

"""

146

"""

146

# As of now, if one of the parent node is null, p2 is null

147

# As of now, if one of the parent node is null, p2 is null

147

if p2 == nullid:

148

if p2 == nullid:

148

# deep copy of a hash is faster than creating one

149

# deep copy of a hash is faster than creating one

149

s = _nullhash.copy()

150

s = _nullhash.copy()

150

s.update(p1)

151

s.update(p1)

151

else:

152

else:

152

# none of the parent nodes are nullid

153

# none of the parent nodes are nullid

153

if p1 < p2:

154

if p1 < p2:

154

a = p1

155

a = p1

155

b = p2

156

b = p2

156

else:

157

else:

157

a = p2

158

a = p2

158

b = p1

159

b = p1

159

s = hashlib.sha1(a)

160

s = hashlib.sha1(a)

160

s.update(b)

161

s.update(b)

161

s.update(text)

162

s.update(text)

162

return s.digest()

163

return s.digest()

163

164

def _slicechunk(revlog, revs):

165

def _slicechunk(revlog, revs):

165

"""slice revs to reduce the amount of unrelated data to be read from disk.

166

"""slice revs to reduce the amount of unrelated data to be read from disk.

166

167

``revs`` is sliced into groups that should be read in one time.

168

``revs`` is sliced into groups that should be read in one time.

168

Assume that revs are sorted.

169

Assume that revs are sorted.

169

"""

170

"""

170

start = revlog.start

171

start = revlog.start

171

length = revlog.length

172

length = revlog.length

172

173

chunkqueue = collections.deque()

174

if len(revs) <= 1:

174

chunkqueue.append((revs, 0))

175

yield revs

175

176

return

176

while chunkqueue:

177

revs, depth = chunkqueue.popleft()

178

177

179

startbyte = start(revs[0])

178

startbyte = start(revs[0])

180

endbyte = start(revs[-1]) + length(revs[-1])

179

endbyte = start(revs[-1]) + length(revs[-1])

181

deltachainspan = endbyte - startbyte

180

readdata = deltachainspan = endbyte - startbyte

182

181

183

if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:

182

chainpayload = sum(length(r) for r in revs)

184

yield revs

185

continue

186

183

187

# Find where is the largest hole (this is where we would split) and

184

if deltachainspan:

188

# sum up the lengths of useful data to compute the density of the span

185

density = chainpayload / float(deltachainspan)

189

textlen = 0

186

else:

187

density = 1.0

188

189

# Store the gaps in a heap to have them sorted by decreasing size

190

gapsheap = []

191

heapq.heapify(gapsheap)

190

prevend = None

192

prevend = None

191

largesthole = 0

192

idxlargesthole = -1

193

for i, rev in enumerate(revs):

193

for i, rev in enumerate(revs):

194

revstart = start(rev)

194

revstart = start(rev)

195

revlen = length(rev)

195

revlen = length(rev)

196

197

if prevend is not None:

197

if prevend is not None:

198

~~hol~~e = revstart - prevend

198

gapsize = revstart - prevend

199

if ~~hole~~ > ~~largesthol~~e:

199

if gapsize:

200

largesthole = hole

200

heapq.heappush(gapsheap, (-gapsize, i))

201

idxlargesthole = i

202

201

203

textlen += revlen

204

prevend = revstart + revlen

202

prevend = revstart + revlen

205

203

206

density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0

204

# Collect the indices of the largest holes until the density is acceptable

205

indicesheap = []

206

heapq.heapify(indicesheap)

207

while gapsheap and density < revlog._srdensitythreshold:

208

oppgapsize, gapidx = heapq.heappop(gapsheap)

209

210

heapq.heappush(indicesheap, gapidx)

207

211

208

if density > revlog._srdensitythreshold:

212

# the gap sizes are stored as negatives to be sorted decreasingly

209

yield revs

213

# by the heap

210

continue

214

readdata -= (-oppgapsize)

215

if readdata > 0:

216

density = chainpayload / float(readdata)

217

else:

218

density = 1.0

211

219

212

# Add the left and right parts so that they will be sliced

220

# Cut the revs at collected indices

213

# recursively too

221

previdx = 0

214

chunkqueue.append((revs[:idxlargesthole], depth + 1))

222

while indicesheap:

215

chunkqueue.append((revs[idxlargesthole:], depth + 1))

223

idx = heapq.heappop(indicesheap)

224

yield revs[previdx:idx]

225

previdx = idx

226

yield revs[previdx:]

216

227

217

# index v0:

228

# index v0:

218

# 4 bytes: offset

229

# 4 bytes: offset

219

# 4 bytes: compressed length

230

# 4 bytes: compressed length

220

# 4 bytes: base rev

231

# 4 bytes: base rev

221

# 4 bytes: link rev

232

# 4 bytes: link rev

222

# 20 bytes: parent 1 nodeid

233

# 20 bytes: parent 1 nodeid

223

# 20 bytes: parent 2 nodeid

234

# 20 bytes: parent 2 nodeid

224

# 20 bytes: nodeid

235

# 20 bytes: nodeid

225

indexformatv0 = struct.Struct(">4l20s20s20s")

236

indexformatv0 = struct.Struct(">4l20s20s20s")

226

indexformatv0_pack = indexformatv0.pack

237

indexformatv0_pack = indexformatv0.pack

227

indexformatv0_unpack = indexformatv0.unpack

238

indexformatv0_unpack = indexformatv0.unpack

228

239

229

class revlogoldio(object):

240

class revlogoldio(object):

230

def __init__(self):

241

def __init__(self):

231

self.size = indexformatv0.size

242

self.size = indexformatv0.size

232

243

233

def parseindex(self, data, inline):

244

def parseindex(self, data, inline):

234

s = self.size

245

s = self.size

235

index = []

246

index = []

236

nodemap = {nullid: nullrev}

247

nodemap = {nullid: nullrev}

237

n = off = 0

248

n = off = 0

238

l = len(data)

249

l = len(data)

239

while off + s <= l:

250

while off + s <= l:

240

cur = data[off:off + s]

251

cur = data[off:off + s]

241

off += s

252

off += s

242

e = indexformatv0_unpack(cur)

253

e = indexformatv0_unpack(cur)

243

# transform to revlogv1 format

254

# transform to revlogv1 format

244

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

255

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

245

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

256

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

246

index.append(e2)

257

index.append(e2)

247

nodemap[e[6]] = n

258

nodemap[e[6]] = n

248

n += 1

259

n += 1

249

260

250

# add the magic null revision at -1

261

# add the magic null revision at -1

251

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

262

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

252

263

253

return index, nodemap, None

264

return index, nodemap, None

254

265

255

def packentry(self, entry, node, version, rev):

266

def packentry(self, entry, node, version, rev):

256

if gettype(entry[0]):

267

if gettype(entry[0]):

257

raise RevlogError(_('index entry flags need revlog version 1'))

268

raise RevlogError(_('index entry flags need revlog version 1'))

258

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

269

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

259

node(entry[5]), node(entry[6]), entry[7])

270

node(entry[5]), node(entry[6]), entry[7])

260

return indexformatv0_pack(*e2)

271

return indexformatv0_pack(*e2)

261

272

262

# index ng:

273

# index ng:

263

# 6 bytes: offset

274

# 6 bytes: offset

264

# 2 bytes: flags

275

# 2 bytes: flags

265

# 4 bytes: compressed length

276

# 4 bytes: compressed length

266

# 4 bytes: uncompressed length

277

# 4 bytes: uncompressed length

267

# 4 bytes: base rev

278

# 4 bytes: base rev

268

# 4 bytes: link rev

279

# 4 bytes: link rev

269

# 4 bytes: parent 1 rev

280

# 4 bytes: parent 1 rev

270

# 4 bytes: parent 2 rev

281

# 4 bytes: parent 2 rev

271

# 32 bytes: nodeid

282

# 32 bytes: nodeid

272

indexformatng = struct.Struct(">Qiiiiii20s12x")

283

indexformatng = struct.Struct(">Qiiiiii20s12x")

273

indexformatng_pack = indexformatng.pack

284

indexformatng_pack = indexformatng.pack

274

versionformat = struct.Struct(">I")

285

versionformat = struct.Struct(">I")

275

versionformat_pack = versionformat.pack

286

versionformat_pack = versionformat.pack

276

versionformat_unpack = versionformat.unpack

287

versionformat_unpack = versionformat.unpack

277

288

278

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

289

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

279

# signed integer)

290

# signed integer)

280

_maxentrysize = 0x7fffffff

291

_maxentrysize = 0x7fffffff

281

292

282

class revlogio(object):

293

class revlogio(object):

283

def __init__(self):

294

def __init__(self):

284

self.size = indexformatng.size

295

self.size = indexformatng.size

285

296

286

def parseindex(self, data, inline):

297

def parseindex(self, data, inline):

287

# call the C implementation to parse the index data

298

# call the C implementation to parse the index data

288

index, cache = parsers.parse_index2(data, inline)

299

index, cache = parsers.parse_index2(data, inline)

289

return index, getattr(index, 'nodemap', None), cache

300

return index, getattr(index, 'nodemap', None), cache

290

301

291

def packentry(self, entry, node, version, rev):

302

def packentry(self, entry, node, version, rev):

292

p = indexformatng_pack(*entry)

303

p = indexformatng_pack(*entry)

293

if rev == 0:

304

if rev == 0:

294

p = versionformat_pack(version) + p[4:]

305

p = versionformat_pack(version) + p[4:]

295

return p

306

return p

296

307

297

class revlog(object):

308

class revlog(object):

298

"""

309

"""

299

the underlying revision storage object

310

the underlying revision storage object

300

311

301

A revlog consists of two parts, an index and the revision data.

312

A revlog consists of two parts, an index and the revision data.

302

313

303

The index is a file with a fixed record size containing

314

The index is a file with a fixed record size containing

304

information on each revision, including its nodeid (hash), the

315

information on each revision, including its nodeid (hash), the

305

nodeids of its parents, the position and offset of its data within

316

nodeids of its parents, the position and offset of its data within

306

the data file, and the revision it's based on. Finally, each entry

317

the data file, and the revision it's based on. Finally, each entry

307

contains a linkrev entry that can serve as a pointer to external

318

contains a linkrev entry that can serve as a pointer to external

308

data.

319

data.

309

320

310

The revision data itself is a linear collection of data chunks.

321

The revision data itself is a linear collection of data chunks.

311

Each chunk represents a revision and is usually represented as a

322

Each chunk represents a revision and is usually represented as a

312

delta against the previous chunk. To bound lookup time, runs of

323

delta against the previous chunk. To bound lookup time, runs of

313

deltas are limited to about 2 times the length of the original

324

deltas are limited to about 2 times the length of the original

314

version data. This makes retrieval of a version proportional to

325

version data. This makes retrieval of a version proportional to

315

its size, or O(1) relative to the number of revisions.

326

its size, or O(1) relative to the number of revisions.

316

327

317

Both pieces of the revlog are written to in an append-only

328

Both pieces of the revlog are written to in an append-only

318

fashion, which means we never need to rewrite a file to insert or

329

fashion, which means we never need to rewrite a file to insert or

319

remove data, and can use some simple techniques to avoid the need

330

remove data, and can use some simple techniques to avoid the need

320

for locking while reading.

331

for locking while reading.

321

332

322

If checkambig, indexfile is opened with checkambig=True at

333

If checkambig, indexfile is opened with checkambig=True at

323

writing, to avoid file stat ambiguity.

334

writing, to avoid file stat ambiguity.

324

335

325

If mmaplargeindex is True, and an mmapindexthreshold is set, the

336

If mmaplargeindex is True, and an mmapindexthreshold is set, the

326

index will be mmapped rather than read if it is larger than the

337

index will be mmapped rather than read if it is larger than the

327

configured threshold.

338

configured threshold.

328

"""

339

"""

329

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

340

def __init__(self, opener, indexfile, datafile=None, checkambig=False,

330

mmaplargeindex=False):

341

mmaplargeindex=False):

331

"""

342

"""

332

create a revlog object

343

create a revlog object

333

344

334

opener is a function that abstracts the file opening operation

345

opener is a function that abstracts the file opening operation

335

and can be used to implement COW semantics or the like.

346

and can be used to implement COW semantics or the like.

336

"""

347

"""

337

self.indexfile = indexfile

348

self.indexfile = indexfile

338

self.datafile = datafile or (indexfile[:-2] + ".d")

349

self.datafile = datafile or (indexfile[:-2] + ".d")

339

self.opener = opener

350

self.opener = opener

340

# When True, indexfile is opened with checkambig=True at writing, to

351

# When True, indexfile is opened with checkambig=True at writing, to

341

# avoid file stat ambiguity.

352

# avoid file stat ambiguity.

342

self._checkambig = checkambig

353

self._checkambig = checkambig

343

# 3-tuple of (node, rev, text) for a raw revision.

354

# 3-tuple of (node, rev, text) for a raw revision.

344

self._cache = None

355

self._cache = None

345

# Maps rev to chain base rev.

356

# Maps rev to chain base rev.

346

self._chainbasecache = util.lrucachedict(100)

357

self._chainbasecache = util.lrucachedict(100)

347

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

358

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

348

self._chunkcache = (0, '')

359

self._chunkcache = (0, '')

349

# How much data to read and cache into the raw revlog data cache.

360

# How much data to read and cache into the raw revlog data cache.

350

self._chunkcachesize = 65536

361

self._chunkcachesize = 65536

351

self._maxchainlen = None

362

self._maxchainlen = None

352

self._aggressivemergedeltas = False

363

self._aggressivemergedeltas = False

353

self.index = []

364

self.index = []

354

# Mapping of partial identifiers to full nodes.

365

# Mapping of partial identifiers to full nodes.

355

self._pcache = {}

366

self._pcache = {}

356

# Mapping of revision integer to full node.

367

# Mapping of revision integer to full node.

357

self._nodecache = {nullid: nullrev}

368

self._nodecache = {nullid: nullrev}

358

self._nodepos = None

369

self._nodepos = None

359

self._compengine = 'zlib'

370

self._compengine = 'zlib'

360

self._maxdeltachainspan = -1

371

self._maxdeltachainspan = -1

361

self._withsparseread = False

372

self._withsparseread = False

362

self._srdensitythreshold = 0.25

373

self._srdensitythreshold = 0.25

363

self._srminblocksize = 262144

374

self._srminblocksize = 262144

364

375

365

mmapindexthreshold = None

376

mmapindexthreshold = None

366

v = REVLOG_DEFAULT_VERSION

377

v = REVLOG_DEFAULT_VERSION

367

opts = getattr(opener, 'options', None)

378

opts = getattr(opener, 'options', None)

368

if opts is not None:

379

if opts is not None:

369

if 'revlogv2' in opts:

380

if 'revlogv2' in opts:

370

# version 2 revlogs always use generaldelta.

381

# version 2 revlogs always use generaldelta.

371

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

382

v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA

372

elif 'revlogv1' in opts:

383

elif 'revlogv1' in opts:

373

if 'generaldelta' in opts:

384

if 'generaldelta' in opts:

374

v |= FLAG_GENERALDELTA

385

v |= FLAG_GENERALDELTA

375

else:

386

else:

376

v = 0

387

v = 0

377

if 'chunkcachesize' in opts:

388

if 'chunkcachesize' in opts:

378

self._chunkcachesize = opts['chunkcachesize']

389

self._chunkcachesize = opts['chunkcachesize']

379

if 'maxchainlen' in opts:

390

if 'maxchainlen' in opts:

380

self._maxchainlen = opts['maxchainlen']

391

self._maxchainlen = opts['maxchainlen']

381

if 'aggressivemergedeltas' in opts:

392

if 'aggressivemergedeltas' in opts:

382

self._aggressivemergedeltas = opts['aggressivemergedeltas']

393

self._aggressivemergedeltas = opts['aggressivemergedeltas']

383

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

394

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

384

if 'compengine' in opts:

395

if 'compengine' in opts:

385

self._compengine = opts['compengine']

396

self._compengine = opts['compengine']

386

if 'maxdeltachainspan' in opts:

397

if 'maxdeltachainspan' in opts:

387

self._maxdeltachainspan = opts['maxdeltachainspan']

398

self._maxdeltachainspan = opts['maxdeltachainspan']

388

if mmaplargeindex and 'mmapindexthreshold' in opts:

399

if mmaplargeindex and 'mmapindexthreshold' in opts:

389

mmapindexthreshold = opts['mmapindexthreshold']

400

mmapindexthreshold = opts['mmapindexthreshold']

390

self._withsparseread = bool(opts.get('with-sparse-read', False))

401

self._withsparseread = bool(opts.get('with-sparse-read', False))

391

if 'sparse-read-density-threshold' in opts:

402

if 'sparse-read-density-threshold' in opts:

392

self._srdensitythreshold = opts['sparse-read-density-threshold']

403

self._srdensitythreshold = opts['sparse-read-density-threshold']

393

if 'sparse-read-min-block-size' in opts:

404

if 'sparse-read-min-block-size' in opts:

394

self._srminblocksize = opts['sparse-read-min-block-size']

405

self._srminblocksize = opts['sparse-read-min-block-size']

395

406

396

if self._chunkcachesize <= 0:

407

if self._chunkcachesize <= 0:

397

raise RevlogError(_('revlog chunk cache size %r is not greater '

408

raise RevlogError(_('revlog chunk cache size %r is not greater '

398

'than 0') % self._chunkcachesize)

409

'than 0') % self._chunkcachesize)

399

elif self._chunkcachesize & (self._chunkcachesize - 1):

410

elif self._chunkcachesize & (self._chunkcachesize - 1):

400

raise RevlogError(_('revlog chunk cache size %r is not a power '

411

raise RevlogError(_('revlog chunk cache size %r is not a power '

401

'of 2') % self._chunkcachesize)

412

'of 2') % self._chunkcachesize)

402

413

403

indexdata = ''

414

indexdata = ''

404

self._initempty = True

415

self._initempty = True

405

try:

416

try:

406

f = self.opener(self.indexfile)

417

f = self.opener(self.indexfile)

407

if (mmapindexthreshold is not None and

418

if (mmapindexthreshold is not None and

408

self.opener.fstat(f).st_size >= mmapindexthreshold):

419

self.opener.fstat(f).st_size >= mmapindexthreshold):

409

indexdata = util.buffer(util.mmapread(f))

420

indexdata = util.buffer(util.mmapread(f))

410

else:

421

else:

411

indexdata = f.read()

422

indexdata = f.read()

412

f.close()

423

f.close()

413

if len(indexdata) > 0:

424

if len(indexdata) > 0:

414

v = versionformat_unpack(indexdata[:4])[0]

425

v = versionformat_unpack(indexdata[:4])[0]

415

self._initempty = False

426

self._initempty = False

416

except IOError as inst:

427

except IOError as inst:

417

if inst.errno != errno.ENOENT:

428

if inst.errno != errno.ENOENT:

418

raise

429

raise

419

430

420

self.version = v

431

self.version = v

421

self._inline = v & FLAG_INLINE_DATA

432

self._inline = v & FLAG_INLINE_DATA

422

self._generaldelta = v & FLAG_GENERALDELTA

433

self._generaldelta = v & FLAG_GENERALDELTA

423

flags = v & ~0xFFFF

434

flags = v & ~0xFFFF

424

fmt = v & 0xFFFF

435

fmt = v & 0xFFFF

425

if fmt == REVLOGV0:

436

if fmt == REVLOGV0:

426

if flags:

437

if flags:

427

raise RevlogError(_('unknown flags (%#04x) in version %d '

438

raise RevlogError(_('unknown flags (%#04x) in version %d '

428

'revlog %s') %

439

'revlog %s') %

429

(flags >> 16, fmt, self.indexfile))

440

(flags >> 16, fmt, self.indexfile))

430

elif fmt == REVLOGV1:

441

elif fmt == REVLOGV1:

431

if flags & ~REVLOGV1_FLAGS:

442

if flags & ~REVLOGV1_FLAGS:

432

raise RevlogError(_('unknown flags (%#04x) in version %d '

443

raise RevlogError(_('unknown flags (%#04x) in version %d '

433

'revlog %s') %

444

'revlog %s') %

434

(flags >> 16, fmt, self.indexfile))

445

(flags >> 16, fmt, self.indexfile))

435

elif fmt == REVLOGV2:

446

elif fmt == REVLOGV2:

436

if flags & ~REVLOGV2_FLAGS:

447

if flags & ~REVLOGV2_FLAGS:

437

raise RevlogError(_('unknown flags (%#04x) in version %d '

448

raise RevlogError(_('unknown flags (%#04x) in version %d '

438

'revlog %s') %

449

'revlog %s') %

439

(flags >> 16, fmt, self.indexfile))

450

(flags >> 16, fmt, self.indexfile))

440

else:

451

else:

441

raise RevlogError(_('unknown version (%d) in revlog %s') %

452

raise RevlogError(_('unknown version (%d) in revlog %s') %

442

(fmt, self.indexfile))

453

(fmt, self.indexfile))

443

454

444

self.storedeltachains = True

455

self.storedeltachains = True

445

456

446

self._io = revlogio()

457

self._io = revlogio()

447

if self.version == REVLOGV0:

458

if self.version == REVLOGV0:

448

self._io = revlogoldio()

459

self._io = revlogoldio()

449

try:

460

try:

450

d = self._io.parseindex(indexdata, self._inline)

461

d = self._io.parseindex(indexdata, self._inline)

451

except (ValueError, IndexError):

462

except (ValueError, IndexError):

452

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

463

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

453

self.index, nodemap, self._chunkcache = d

464

self.index, nodemap, self._chunkcache = d

454

if nodemap is not None:

465

if nodemap is not None:

455

self.nodemap = self._nodecache = nodemap

466

self.nodemap = self._nodecache = nodemap

456

if not self._chunkcache:

467

if not self._chunkcache:

457

self._chunkclear()

468

self._chunkclear()

458

# revnum -> (chain-length, sum-delta-length)

469

# revnum -> (chain-length, sum-delta-length)

459

self._chaininfocache = {}

470

self._chaininfocache = {}

460

# revlog header -> revlog compressor

471

# revlog header -> revlog compressor

461

self._decompressors = {}

472

self._decompressors = {}

462

473

463

@util.propertycache

474

@util.propertycache

464

def _compressor(self):

475

def _compressor(self):

465

return util.compengines[self._compengine].revlogcompressor()

476

return util.compengines[self._compengine].revlogcompressor()

466

477

467

def tip(self):

478

def tip(self):

468

return self.node(len(self.index) - 2)

479

return self.node(len(self.index) - 2)

469

def __contains__(self, rev):

480

def __contains__(self, rev):

470

return 0 <= rev < len(self)

481

return 0 <= rev < len(self)

471

def __len__(self):

482

def __len__(self):

472

return len(self.index) - 1

483

return len(self.index) - 1

473

def __iter__(self):

484

def __iter__(self):

474

return iter(xrange(len(self)))

485

return iter(xrange(len(self)))

475

def revs(self, start=0, stop=None):

486

def revs(self, start=0, stop=None):

476

"""iterate over all rev in this revlog (from start to stop)"""

487

"""iterate over all rev in this revlog (from start to stop)"""

477

step = 1

488

step = 1

478

if stop is not None:

489

if stop is not None:

479

if start > stop:

490

if start > stop:

480

step = -1

491

step = -1

481

stop += step

492

stop += step

482

else:

493

else:

483

stop = len(self)

494

stop = len(self)

484

return xrange(start, stop, step)

495

return xrange(start, stop, step)

485

496

486

@util.propertycache

497

@util.propertycache

487

def nodemap(self):

498

def nodemap(self):

488

self.rev(self.node(0))

499

self.rev(self.node(0))

489

return self._nodecache

500

return self._nodecache

490

501

491

def hasnode(self, node):

502

def hasnode(self, node):

492

try:

503

try:

493

self.rev(node)

504

self.rev(node)

494

return True

505

return True

495

except KeyError:

506

except KeyError:

496

return False

507

return False

497

508

498

def clearcaches(self):

509

def clearcaches(self):

499

self._cache = None

510

self._cache = None

500

self._chainbasecache.clear()

511

self._chainbasecache.clear()

501

self._chunkcache = (0, '')

512

self._chunkcache = (0, '')

502

self._pcache = {}

513

self._pcache = {}

503

514

504

try:

515

try:

505

self._nodecache.clearcaches()

516

self._nodecache.clearcaches()

506

except AttributeError:

517

except AttributeError:

507

self._nodecache = {nullid: nullrev}

518

self._nodecache = {nullid: nullrev}

508

self._nodepos = None

519

self._nodepos = None

509

520

510

def rev(self, node):

521

def rev(self, node):

511

try:

522

try:

512

return self._nodecache[node]

523

return self._nodecache[node]

513

except TypeError:

524

except TypeError:

514

raise

525

raise

515

except RevlogError:

526

except RevlogError:

516

# parsers.c radix tree lookup failed

527

# parsers.c radix tree lookup failed

517

if node == wdirid:

528

if node == wdirid:

518

raise error.WdirUnsupported

529

raise error.WdirUnsupported

519

raise LookupError(node, self.indexfile, _('no node'))

530

raise LookupError(node, self.indexfile, _('no node'))

520

except KeyError:

531

except KeyError:

521

# pure python cache lookup failed

532

# pure python cache lookup failed

522

n = self._nodecache

533

n = self._nodecache

523

i = self.index

534

i = self.index

524

p = self._nodepos

535

p = self._nodepos

525

if p is None:

536

if p is None:

526

p = len(i) - 2

537

p = len(i) - 2

527

for r in xrange(p, -1, -1):

538

for r in xrange(p, -1, -1):

528

v = i[r][7]

539

v = i[r][7]

529

n[v] = r

540

n[v] = r

530

if v == node:

541

if v == node:

531

self._nodepos = r - 1

542

self._nodepos = r - 1

532

return r

543

return r

533

if node == wdirid:

544

if node == wdirid:

534

raise error.WdirUnsupported

545

raise error.WdirUnsupported

535

raise LookupError(node, self.indexfile, _('no node'))

546

raise LookupError(node, self.indexfile, _('no node'))

536

547

537

# Accessors for index entries.

548

# Accessors for index entries.

538

549

539

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

550

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

540

# are flags.

551

# are flags.

541

def start(self, rev):

552

def start(self, rev):

542

return int(self.index[rev][0] >> 16)

553

return int(self.index[rev][0] >> 16)

543

554

544

def flags(self, rev):

555

def flags(self, rev):

545

return self.index[rev][0] & 0xFFFF

556

return self.index[rev][0] & 0xFFFF

546

557

547

def length(self, rev):

558

def length(self, rev):

548

return self.index[rev][1]

559

return self.index[rev][1]

549

560

550

def rawsize(self, rev):

561

def rawsize(self, rev):

551

"""return the length of the uncompressed text for a given revision"""

562

"""return the length of the uncompressed text for a given revision"""

552

l = self.index[rev][2]

563

l = self.index[rev][2]

553

if l >= 0:

564

if l >= 0:

554

return l

565

return l

555

566

556

t = self.revision(rev, raw=True)

567

t = self.revision(rev, raw=True)

557

return len(t)

568

return len(t)

558

569

559

def size(self, rev):

570

def size(self, rev):

560

"""length of non-raw text (processed by a "read" flag processor)"""

571

"""length of non-raw text (processed by a "read" flag processor)"""

561

# fast path: if no "read" flag processor could change the content,

572

# fast path: if no "read" flag processor could change the content,

562

# size is rawsize. note: ELLIPSIS is known to not change the content.

573

# size is rawsize. note: ELLIPSIS is known to not change the content.

563

flags = self.flags(rev)

574

flags = self.flags(rev)

564

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

575

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

565

return self.rawsize(rev)

576

return self.rawsize(rev)

566

577

567

return len(self.revision(rev, raw=False))

578

return len(self.revision(rev, raw=False))

568

579

569

def chainbase(self, rev):

580

def chainbase(self, rev):

570

base = self._chainbasecache.get(rev)

581

base = self._chainbasecache.get(rev)

571

if base is not None:

582

if base is not None:

572

return base

583

return base

573

584

574

index = self.index

585

index = self.index

575

base = index[rev][3]

586

base = index[rev][3]

576

while base != rev:

587

while base != rev:

577

rev = base

588

rev = base

578

base = index[rev][3]

589

base = index[rev][3]

579

590

580

self._chainbasecache[rev] = base

591

self._chainbasecache[rev] = base

581

return base

592

return base

582

593

583

def linkrev(self, rev):

594

def linkrev(self, rev):

584

return self.index[rev][4]

595

return self.index[rev][4]

585

596

586

def parentrevs(self, rev):

597

def parentrevs(self, rev):

587

try:

598

try:

588

return self.index[rev][5:7]

599

return self.index[rev][5:7]

589

except IndexError:

600

except IndexError:

590

if rev == wdirrev:

601

if rev == wdirrev:

591

raise error.WdirUnsupported

602

raise error.WdirUnsupported

592

raise

603

raise

593

604

594

def node(self, rev):

605

def node(self, rev):

595

try:

606

try:

596

return self.index[rev][7]

607

return self.index[rev][7]

597

except IndexError:

608

except IndexError:

598

if rev == wdirrev:

609

if rev == wdirrev:

599

raise error.WdirUnsupported

610

raise error.WdirUnsupported

600

raise

611

raise

601

612

602

# Derived from index values.

613

# Derived from index values.

603

614

604

def end(self, rev):

615

def end(self, rev):

605

return self.start(rev) + self.length(rev)

616

return self.start(rev) + self.length(rev)

606

617

607

def parents(self, node):

618

def parents(self, node):

608

i = self.index

619

i = self.index

609

d = i[self.rev(node)]

620

d = i[self.rev(node)]

610

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

621

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

611

622

612

def chainlen(self, rev):

623

def chainlen(self, rev):

613

return self._chaininfo(rev)[0]

624

return self._chaininfo(rev)[0]

614

625

615

def _chaininfo(self, rev):

626

def _chaininfo(self, rev):

616

chaininfocache = self._chaininfocache

627

chaininfocache = self._chaininfocache

617

if rev in chaininfocache:

628

if rev in chaininfocache:

618

return chaininfocache[rev]

629

return chaininfocache[rev]

619

index = self.index

630

index = self.index

620

generaldelta = self._generaldelta

631

generaldelta = self._generaldelta

621

iterrev = rev

632

iterrev = rev

622

e = index[iterrev]

633

e = index[iterrev]

623

clen = 0

634

clen = 0

624

compresseddeltalen = 0

635

compresseddeltalen = 0

625

while iterrev != e[3]:

636

while iterrev != e[3]:

626

clen += 1

637

clen += 1

627

compresseddeltalen += e[1]

638

compresseddeltalen += e[1]

628

if generaldelta:

639

if generaldelta:

629

iterrev = e[3]

640

iterrev = e[3]

630

else:

641

else:

631

iterrev -= 1

642

iterrev -= 1

632

if iterrev in chaininfocache:

643

if iterrev in chaininfocache:

633

t = chaininfocache[iterrev]

644

t = chaininfocache[iterrev]

634

clen += t[0]

645

clen += t[0]

635

compresseddeltalen += t[1]

646

compresseddeltalen += t[1]

636

break

647

break

637

e = index[iterrev]

648

e = index[iterrev]

638

else:

649

else:

639

# Add text length of base since decompressing that also takes

650

# Add text length of base since decompressing that also takes

640

# work. For cache hits the length is already included.

651

# work. For cache hits the length is already included.

641

compresseddeltalen += e[1]

652

compresseddeltalen += e[1]

642

r = (clen, compresseddeltalen)

653

r = (clen, compresseddeltalen)

643

chaininfocache[rev] = r

654

chaininfocache[rev] = r

644

return r

655

return r

645

656

646

def _deltachain(self, rev, stoprev=None):

657

def _deltachain(self, rev, stoprev=None):

647

"""Obtain the delta chain for a revision.

658

"""Obtain the delta chain for a revision.

648

659

649

``stoprev`` specifies a revision to stop at. If not specified, we

660

``stoprev`` specifies a revision to stop at. If not specified, we

650

stop at the base of the chain.

661

stop at the base of the chain.

651

662

652

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

663

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

653

revs in ascending order and ``stopped`` is a bool indicating whether

664

revs in ascending order and ``stopped`` is a bool indicating whether

654

``stoprev`` was hit.

665

``stoprev`` was hit.

655

"""

666

"""

656

# Try C implementation.

667

# Try C implementation.

657

try:

668

try:

658

return self.index.deltachain(rev, stoprev, self._generaldelta)

669

return self.index.deltachain(rev, stoprev, self._generaldelta)

659

except AttributeError:

670

except AttributeError:

660

pass

671

pass

661

672

662

chain = []

673

chain = []

663

674

664

# Alias to prevent attribute lookup in tight loop.

675

# Alias to prevent attribute lookup in tight loop.

665

index = self.index

676

index = self.index

666

generaldelta = self._generaldelta

677

generaldelta = self._generaldelta

667

678

668

iterrev = rev

679

iterrev = rev

669

e = index[iterrev]

680

e = index[iterrev]

670

while iterrev != e[3] and iterrev != stoprev:

681

while iterrev != e[3] and iterrev != stoprev:

671

chain.append(iterrev)

682

chain.append(iterrev)

672

if generaldelta:

683

if generaldelta:

673

iterrev = e[3]

684

iterrev = e[3]

674

else:

685

else:

675

iterrev -= 1

686

iterrev -= 1

676

e = index[iterrev]

687

e = index[iterrev]

677

688

678

if iterrev == stoprev:

689

if iterrev == stoprev:

679

stopped = True

690

stopped = True

680

else:

691

else:

681

chain.append(iterrev)

692

chain.append(iterrev)

682

stopped = False

693

stopped = False

683

694

684

chain.reverse()

695

chain.reverse()

685

return chain, stopped

696

return chain, stopped

686

697

687

def ancestors(self, revs, stoprev=0, inclusive=False):

698

def ancestors(self, revs, stoprev=0, inclusive=False):

688

"""Generate the ancestors of 'revs' in reverse topological order.

699

"""Generate the ancestors of 'revs' in reverse topological order.

689

Does not generate revs lower than stoprev.

700

Does not generate revs lower than stoprev.

690

701

691

See the documentation for ancestor.lazyancestors for more details."""

702

See the documentation for ancestor.lazyancestors for more details."""

692

703

693

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

704

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

694

inclusive=inclusive)

705

inclusive=inclusive)

695

706

696

def descendants(self, revs):

707

def descendants(self, revs):

697

"""Generate the descendants of 'revs' in revision order.

708

"""Generate the descendants of 'revs' in revision order.

698

709

699

Yield a sequence of revision numbers starting with a child of

710

Yield a sequence of revision numbers starting with a child of

700

some rev in revs, i.e., each revision is *not* considered a

711

some rev in revs, i.e., each revision is *not* considered a

701

descendant of itself. Results are ordered by revision number (a

712

descendant of itself. Results are ordered by revision number (a

702

topological sort)."""

713

topological sort)."""

703

first = min(revs)

714

first = min(revs)

704

if first == nullrev:

715

if first == nullrev:

705

for i in self:

716

for i in self:

706

yield i

717

yield i

707

return

718

return

708

719

709

seen = set(revs)

720

seen = set(revs)

710

for i in self.revs(start=first + 1):

721

for i in self.revs(start=first + 1):

711

for x in self.parentrevs(i):

722

for x in self.parentrevs(i):

712

if x != nullrev and x in seen:

723

if x != nullrev and x in seen:

713

seen.add(i)

724

seen.add(i)

714

yield i

725

yield i

715

break

726

break

716

727

717

def findcommonmissing(self, common=None, heads=None):

728

def findcommonmissing(self, common=None, heads=None):

718

"""Return a tuple of the ancestors of common and the ancestors of heads

729

"""Return a tuple of the ancestors of common and the ancestors of heads

719

that are not ancestors of common. In revset terminology, we return the

730

that are not ancestors of common. In revset terminology, we return the

720

tuple:

731

tuple:

721

732

722

::common, (::heads) - (::common)

733

::common, (::heads) - (::common)

723

734

724

The list is sorted by revision number, meaning it is

735

The list is sorted by revision number, meaning it is

725

topologically sorted.

736

topologically sorted.

726

737

727

'heads' and 'common' are both lists of node IDs. If heads is

738

'heads' and 'common' are both lists of node IDs. If heads is

728

not supplied, uses all of the revlog's heads. If common is not

739

not supplied, uses all of the revlog's heads. If common is not

729

supplied, uses nullid."""

740

supplied, uses nullid."""

730

if common is None:

741

if common is None:

731

common = [nullid]

742

common = [nullid]

732

if heads is None:

743

if heads is None:

733

heads = self.heads()

744

heads = self.heads()

734

745

735

common = [self.rev(n) for n in common]

746

common = [self.rev(n) for n in common]

736

heads = [self.rev(n) for n in heads]

747

heads = [self.rev(n) for n in heads]

737

748

738

# we want the ancestors, but inclusive

749

# we want the ancestors, but inclusive

739

class lazyset(object):

750

class lazyset(object):

740

def __init__(self, lazyvalues):

751

def __init__(self, lazyvalues):

741

self.addedvalues = set()

752

self.addedvalues = set()

742

self.lazyvalues = lazyvalues

753

self.lazyvalues = lazyvalues

743

754

744

def __contains__(self, value):

755

def __contains__(self, value):

745

return value in self.addedvalues or value in self.lazyvalues

756

return value in self.addedvalues or value in self.lazyvalues

746

757

747

def __iter__(self):

758

def __iter__(self):

748

added = self.addedvalues

759

added = self.addedvalues

749

for r in added:

760

for r in added:

750

yield r

761

yield r

751

for r in self.lazyvalues:

762

for r in self.lazyvalues:

752

if not r in added:

763

if not r in added:

753

yield r

764

yield r

754

765

755

def add(self, value):

766

def add(self, value):

756

self.addedvalues.add(value)

767

self.addedvalues.add(value)

757

768

758

def update(self, values):

769

def update(self, values):

759

self.addedvalues.update(values)

770

self.addedvalues.update(values)

760

771

761

has = lazyset(self.ancestors(common))

772

has = lazyset(self.ancestors(common))

762

has.add(nullrev)

773

has.add(nullrev)

763

has.update(common)

774

has.update(common)

764

775

765

# take all ancestors from heads that aren't in has

776

# take all ancestors from heads that aren't in has

766

missing = set()

777

missing = set()

767

visit = collections.deque(r for r in heads if r not in has)

778

visit = collections.deque(r for r in heads if r not in has)

768

while visit:

779

while visit:

769

r = visit.popleft()

780

r = visit.popleft()

770

if r in missing:

781

if r in missing:

771

continue

782

continue

772

else:

783

else:

773

missing.add(r)

784

missing.add(r)

774

for p in self.parentrevs(r):

785

for p in self.parentrevs(r):

775

if p not in has:

786

if p not in has:

776

visit.append(p)

787

visit.append(p)

777

missing = list(missing)

788

missing = list(missing)

778

missing.sort()

789

missing.sort()

779

return has, [self.node(miss) for miss in missing]

790

return has, [self.node(miss) for miss in missing]

780

791

781

def incrementalmissingrevs(self, common=None):

792

def incrementalmissingrevs(self, common=None):

782

"""Return an object that can be used to incrementally compute the

793

"""Return an object that can be used to incrementally compute the

783

revision numbers of the ancestors of arbitrary sets that are not

794

revision numbers of the ancestors of arbitrary sets that are not

784

ancestors of common. This is an ancestor.incrementalmissingancestors

795

ancestors of common. This is an ancestor.incrementalmissingancestors

785

object.

796

object.

786

797

787

'common' is a list of revision numbers. If common is not supplied, uses

798

'common' is a list of revision numbers. If common is not supplied, uses

788

nullrev.

799

nullrev.

789

"""

800

"""

790

if common is None:

801

if common is None:

791

common = [nullrev]

802

common = [nullrev]

792

803

793

return ancestor.incrementalmissingancestors(self.parentrevs, common)

804

return ancestor.incrementalmissingancestors(self.parentrevs, common)

794

805

795

def findmissingrevs(self, common=None, heads=None):

806

def findmissingrevs(self, common=None, heads=None):

796

"""Return the revision numbers of the ancestors of heads that

807

"""Return the revision numbers of the ancestors of heads that

797

are not ancestors of common.

808

are not ancestors of common.

798

809

799

More specifically, return a list of revision numbers corresponding to

810

More specifically, return a list of revision numbers corresponding to

800

nodes N such that every N satisfies the following constraints:

811

nodes N such that every N satisfies the following constraints:

801

812

802

1. N is an ancestor of some node in 'heads'

813

1. N is an ancestor of some node in 'heads'

803

2. N is not an ancestor of any node in 'common'

814

2. N is not an ancestor of any node in 'common'

804

815

805

The list is sorted by revision number, meaning it is

816

The list is sorted by revision number, meaning it is

806

topologically sorted.

817

topologically sorted.

807

818

808

'heads' and 'common' are both lists of revision numbers. If heads is

819

'heads' and 'common' are both lists of revision numbers. If heads is

809

not supplied, uses all of the revlog's heads. If common is not

820

not supplied, uses all of the revlog's heads. If common is not

810

supplied, uses nullid."""

821

supplied, uses nullid."""

811

if common is None:

822

if common is None:

812

common = [nullrev]

823

common = [nullrev]

813

if heads is None:

824

if heads is None:

814

heads = self.headrevs()

825

heads = self.headrevs()

815

826

816

inc = self.incrementalmissingrevs(common=common)

827

inc = self.incrementalmissingrevs(common=common)

817

return inc.missingancestors(heads)

828

return inc.missingancestors(heads)

818

829

819

def findmissing(self, common=None, heads=None):

830

def findmissing(self, common=None, heads=None):

820

"""Return the ancestors of heads that are not ancestors of common.

831

"""Return the ancestors of heads that are not ancestors of common.

821

832

822

More specifically, return a list of nodes N such that every N

833

More specifically, return a list of nodes N such that every N

823

satisfies the following constraints:

834

satisfies the following constraints:

824

835

825

1. N is an ancestor of some node in 'heads'

836

1. N is an ancestor of some node in 'heads'

826

2. N is not an ancestor of any node in 'common'

837

2. N is not an ancestor of any node in 'common'

827

838

828

The list is sorted by revision number, meaning it is

839

The list is sorted by revision number, meaning it is

829

topologically sorted.

840

topologically sorted.

830

841

831

'heads' and 'common' are both lists of node IDs. If heads is

842

'heads' and 'common' are both lists of node IDs. If heads is

832

not supplied, uses all of the revlog's heads. If common is not

843

not supplied, uses all of the revlog's heads. If common is not

833

supplied, uses nullid."""

844

supplied, uses nullid."""

834

if common is None:

845

if common is None:

835

common = [nullid]

846

common = [nullid]

836

if heads is None:

847

if heads is None:

837

heads = self.heads()

848

heads = self.heads()

838

849

839

common = [self.rev(n) for n in common]

850

common = [self.rev(n) for n in common]

840

heads = [self.rev(n) for n in heads]

851

heads = [self.rev(n) for n in heads]

841

852

842

inc = self.incrementalmissingrevs(common=common)

853

inc = self.incrementalmissingrevs(common=common)

843

return [self.node(r) for r in inc.missingancestors(heads)]

854

return [self.node(r) for r in inc.missingancestors(heads)]

844

855

845

def nodesbetween(self, roots=None, heads=None):

856

def nodesbetween(self, roots=None, heads=None):

846

"""Return a topological path from 'roots' to 'heads'.

857

"""Return a topological path from 'roots' to 'heads'.

847

858

848

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

859

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

849

topologically sorted list of all nodes N that satisfy both of

860

topologically sorted list of all nodes N that satisfy both of

850

these constraints:

861

these constraints:

851

862

852

1. N is a descendant of some node in 'roots'

863

1. N is a descendant of some node in 'roots'

853

2. N is an ancestor of some node in 'heads'

864

2. N is an ancestor of some node in 'heads'

854

865

855

Every node is considered to be both a descendant and an ancestor

866

Every node is considered to be both a descendant and an ancestor

856

of itself, so every reachable node in 'roots' and 'heads' will be

867

of itself, so every reachable node in 'roots' and 'heads' will be

857

included in 'nodes'.

868

included in 'nodes'.

858

869

859

'outroots' is the list of reachable nodes in 'roots', i.e., the

870

'outroots' is the list of reachable nodes in 'roots', i.e., the

860

subset of 'roots' that is returned in 'nodes'. Likewise,

871

subset of 'roots' that is returned in 'nodes'. Likewise,

861

'outheads' is the subset of 'heads' that is also in 'nodes'.

872

'outheads' is the subset of 'heads' that is also in 'nodes'.

862

873

863

'roots' and 'heads' are both lists of node IDs. If 'roots' is

874

'roots' and 'heads' are both lists of node IDs. If 'roots' is

864

unspecified, uses nullid as the only root. If 'heads' is

875

unspecified, uses nullid as the only root. If 'heads' is

865

unspecified, uses list of all of the revlog's heads."""

876

unspecified, uses list of all of the revlog's heads."""

866

nonodes = ([], [], [])

877

nonodes = ([], [], [])

867

if roots is not None:

878

if roots is not None:

868

roots = list(roots)

879

roots = list(roots)

869

if not roots:

880

if not roots:

870

return nonodes

881

return nonodes

871

lowestrev = min([self.rev(n) for n in roots])

882

lowestrev = min([self.rev(n) for n in roots])

872

else:

883

else:

873

roots = [nullid] # Everybody's a descendant of nullid

884

roots = [nullid] # Everybody's a descendant of nullid

874

lowestrev = nullrev

885

lowestrev = nullrev

875

if (lowestrev == nullrev) and (heads is None):

886

if (lowestrev == nullrev) and (heads is None):

876

# We want _all_ the nodes!

887

# We want _all_ the nodes!

877

return ([self.node(r) for r in self], [nullid], list(self.heads()))

888

return ([self.node(r) for r in self], [nullid], list(self.heads()))

878

if heads is None:

889

if heads is None:

879

# All nodes are ancestors, so the latest ancestor is the last

890

# All nodes are ancestors, so the latest ancestor is the last

880

# node.

891

# node.

881

highestrev = len(self) - 1

892

highestrev = len(self) - 1

882

# Set ancestors to None to signal that every node is an ancestor.

893

# Set ancestors to None to signal that every node is an ancestor.

883

ancestors = None

894

ancestors = None

884

# Set heads to an empty dictionary for later discovery of heads

895

# Set heads to an empty dictionary for later discovery of heads

885

heads = {}

896

heads = {}

886

else:

897

else:

887

heads = list(heads)

898

heads = list(heads)

888

if not heads:

899

if not heads:

889

return nonodes

900

return nonodes

890

ancestors = set()

901

ancestors = set()

891

# Turn heads into a dictionary so we can remove 'fake' heads.

902

# Turn heads into a dictionary so we can remove 'fake' heads.

892

# Also, later we will be using it to filter out the heads we can't

903

# Also, later we will be using it to filter out the heads we can't

893

# find from roots.

904

# find from roots.

894

heads = dict.fromkeys(heads, False)

905

heads = dict.fromkeys(heads, False)

895

# Start at the top and keep marking parents until we're done.

906

# Start at the top and keep marking parents until we're done.

896

nodestotag = set(heads)

907

nodestotag = set(heads)

897

# Remember where the top was so we can use it as a limit later.

908

# Remember where the top was so we can use it as a limit later.

898

highestrev = max([self.rev(n) for n in nodestotag])

909

highestrev = max([self.rev(n) for n in nodestotag])

899

while nodestotag:

910

while nodestotag:

900

# grab a node to tag

911

# grab a node to tag

901

n = nodestotag.pop()

912

n = nodestotag.pop()

902

# Never tag nullid

913

# Never tag nullid

903

if n == nullid:

914

if n == nullid:

904

continue

915

continue

905

# A node's revision number represents its place in a

916

# A node's revision number represents its place in a

906

# topologically sorted list of nodes.

917

# topologically sorted list of nodes.

907

r = self.rev(n)

918

r = self.rev(n)

908

if r >= lowestrev:

919

if r >= lowestrev:

909

if n not in ancestors:

920

if n not in ancestors:

910

# If we are possibly a descendant of one of the roots

921

# If we are possibly a descendant of one of the roots

911

# and we haven't already been marked as an ancestor

922

# and we haven't already been marked as an ancestor

912

ancestors.add(n) # Mark as ancestor

923

ancestors.add(n) # Mark as ancestor

913

# Add non-nullid parents to list of nodes to tag.

924

# Add non-nullid parents to list of nodes to tag.

914

nodestotag.update([p for p in self.parents(n) if

925

nodestotag.update([p for p in self.parents(n) if

915

p != nullid])

926

p != nullid])

916

elif n in heads: # We've seen it before, is it a fake head?

927

elif n in heads: # We've seen it before, is it a fake head?

917

# So it is, real heads should not be the ancestors of

928

# So it is, real heads should not be the ancestors of

918

# any other heads.

929

# any other heads.

919

heads.pop(n)

930

heads.pop(n)

920

if not ancestors:

931

if not ancestors:

921

return nonodes

932

return nonodes

922

# Now that we have our set of ancestors, we want to remove any

933

# Now that we have our set of ancestors, we want to remove any

923

# roots that are not ancestors.

934

# roots that are not ancestors.

924

935

925

# If one of the roots was nullid, everything is included anyway.

936

# If one of the roots was nullid, everything is included anyway.

926

if lowestrev > nullrev:

937

if lowestrev > nullrev:

927

# But, since we weren't, let's recompute the lowest rev to not

938

# But, since we weren't, let's recompute the lowest rev to not

928

# include roots that aren't ancestors.

939

# include roots that aren't ancestors.

929

940

930

# Filter out roots that aren't ancestors of heads

941

# Filter out roots that aren't ancestors of heads

931

roots = [root for root in roots if root in ancestors]

942

roots = [root for root in roots if root in ancestors]

932

# Recompute the lowest revision

943

# Recompute the lowest revision

933

if roots:

944

if roots:

934

lowestrev = min([self.rev(root) for root in roots])

945

lowestrev = min([self.rev(root) for root in roots])

935

else:

946

else:

936

# No more roots? Return empty list

947

# No more roots? Return empty list

937

return nonodes

948

return nonodes

938

else:

949

else:

939

# We are descending from nullid, and don't need to care about

950

# We are descending from nullid, and don't need to care about

940

# any other roots.

951

# any other roots.

941

lowestrev = nullrev

952

lowestrev = nullrev

942

roots = [nullid]

953

roots = [nullid]

943

# Transform our roots list into a set.

954

# Transform our roots list into a set.

944

descendants = set(roots)

955

descendants = set(roots)

945

# Also, keep the original roots so we can filter out roots that aren't

956

# Also, keep the original roots so we can filter out roots that aren't

946

# 'real' roots (i.e. are descended from other roots).

957

# 'real' roots (i.e. are descended from other roots).

947

roots = descendants.copy()

958

roots = descendants.copy()

948

# Our topologically sorted list of output nodes.

959

# Our topologically sorted list of output nodes.

949

orderedout = []

960

orderedout = []

950

# Don't start at nullid since we don't want nullid in our output list,

961

# Don't start at nullid since we don't want nullid in our output list,

951

# and if nullid shows up in descendants, empty parents will look like

962

# and if nullid shows up in descendants, empty parents will look like

952

# they're descendants.

963

# they're descendants.

953

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

964

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

954

n = self.node(r)

965

n = self.node(r)

955

isdescendant = False

966

isdescendant = False

956

if lowestrev == nullrev: # Everybody is a descendant of nullid

967

if lowestrev == nullrev: # Everybody is a descendant of nullid

957

isdescendant = True

968

isdescendant = True

958

elif n in descendants:

969

elif n in descendants:

959

# n is already a descendant

970

# n is already a descendant

960

isdescendant = True

971

isdescendant = True

961

# This check only needs to be done here because all the roots

972

# This check only needs to be done here because all the roots

962

# will start being marked is descendants before the loop.

973

# will start being marked is descendants before the loop.

963

if n in roots:

974

if n in roots:

964

# If n was a root, check if it's a 'real' root.

975

# If n was a root, check if it's a 'real' root.

965

p = tuple(self.parents(n))

976

p = tuple(self.parents(n))

966

# If any of its parents are descendants, it's not a root.

977

# If any of its parents are descendants, it's not a root.

967

if (p[0] in descendants) or (p[1] in descendants):

978

if (p[0] in descendants) or (p[1] in descendants):

968

roots.remove(n)

979

roots.remove(n)

969

else:

980

else:

970

p = tuple(self.parents(n))

981

p = tuple(self.parents(n))

971

# A node is a descendant if either of its parents are

982

# A node is a descendant if either of its parents are

972

# descendants. (We seeded the dependents list with the roots

983

# descendants. (We seeded the dependents list with the roots

973

# up there, remember?)

984

# up there, remember?)

974

if (p[0] in descendants) or (p[1] in descendants):

985

if (p[0] in descendants) or (p[1] in descendants):

975

descendants.add(n)

986

descendants.add(n)

976

isdescendant = True

987

isdescendant = True

977

if isdescendant and ((ancestors is None) or (n in ancestors)):

988

if isdescendant and ((ancestors is None) or (n in ancestors)):

978

# Only include nodes that are both descendants and ancestors.

989

# Only include nodes that are both descendants and ancestors.

979

orderedout.append(n)

990

orderedout.append(n)

980

if (ancestors is not None) and (n in heads):

991

if (ancestors is not None) and (n in heads):

981

# We're trying to figure out which heads are reachable

992

# We're trying to figure out which heads are reachable

982

# from roots.

993

# from roots.

983

# Mark this head as having been reached

994

# Mark this head as having been reached

984

heads[n] = True

995

heads[n] = True

985

elif ancestors is None:

996

elif ancestors is None:

986

# Otherwise, we're trying to discover the heads.

997

# Otherwise, we're trying to discover the heads.

987

# Assume this is a head because if it isn't, the next step

998

# Assume this is a head because if it isn't, the next step

988

# will eventually remove it.

999

# will eventually remove it.

989

heads[n] = True

1000

heads[n] = True

990

# But, obviously its parents aren't.

1001

# But, obviously its parents aren't.

991

for p in self.parents(n):

1002

for p in self.parents(n):

992

heads.pop(p, None)

1003

heads.pop(p, None)

993

heads = [head for head, flag in heads.iteritems() if flag]

1004

heads = [head for head, flag in heads.iteritems() if flag]

994

roots = list(roots)

1005

roots = list(roots)

995

assert orderedout

1006

assert orderedout

996

assert roots

1007

assert roots

997

assert heads

1008

assert heads

998

return (orderedout, roots, heads)

1009

return (orderedout, roots, heads)

999

1010

1000

def headrevs(self):

1011

def headrevs(self):

1001

try:

1012

try:

1002

return self.index.headrevs()

1013

return self.index.headrevs()

1003

except AttributeError:

1014

except AttributeError:

1004

return self._headrevs()

1015

return self._headrevs()

1005

1016

1006

def computephases(self, roots):

1017

def computephases(self, roots):

1007

return self.index.computephasesmapsets(roots)

1018

return self.index.computephasesmapsets(roots)

1008

1019

1009

def _headrevs(self):

1020

def _headrevs(self):

1010

count = len(self)

1021

count = len(self)

1011

if not count:

1022

if not count:

1012

return [nullrev]

1023

return [nullrev]

1013

# we won't iter over filtered rev so nobody is a head at start

1024

# we won't iter over filtered rev so nobody is a head at start

1014

ishead = [0] * (count + 1)

1025

ishead = [0] * (count + 1)

1015

index = self.index

1026

index = self.index

1016

for r in self:

1027

for r in self:

1017

ishead[r] = 1 # I may be an head

1028

ishead[r] = 1 # I may be an head

1018

e = index[r]

1029

e = index[r]

1019

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1030

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

1020

return [r for r, val in enumerate(ishead) if val]

1031

return [r for r, val in enumerate(ishead) if val]

1021

1032

1022

def heads(self, start=None, stop=None):

1033

def heads(self, start=None, stop=None):

1023

"""return the list of all nodes that have no children

1034

"""return the list of all nodes that have no children

1024

1035

1025

if start is specified, only heads that are descendants of

1036

if start is specified, only heads that are descendants of

1026

start will be returned

1037

start will be returned

1027

if stop is specified, it will consider all the revs from stop

1038

if stop is specified, it will consider all the revs from stop

1028

as if they had no children

1039

as if they had no children

1029

"""

1040

"""

1030

if start is None and stop is None:

1041

if start is None and stop is None:

1031

if not len(self):

1042

if not len(self):

1032

return [nullid]

1043

return [nullid]

1033

return [self.node(r) for r in self.headrevs()]

1044

return [self.node(r) for r in self.headrevs()]

1034

1045

1035

if start is None:

1046

if start is None:

1036

start = nullid

1047

start = nullid

1037

if stop is None:

1048

if stop is None:

1038

stop = []

1049

stop = []

1039

stoprevs = set([self.rev(n) for n in stop])

1050

stoprevs = set([self.rev(n) for n in stop])

1040

startrev = self.rev(start)

1051

startrev = self.rev(start)

1041

reachable = {startrev}

1052

reachable = {startrev}

1042

heads = {startrev}

1053

heads = {startrev}

1043

1054

1044

parentrevs = self.parentrevs

1055

parentrevs = self.parentrevs

1045

for r in self.revs(start=startrev + 1):

1056

for r in self.revs(start=startrev + 1):

1046

for p in parentrevs(r):

1057

for p in parentrevs(r):

1047

if p in reachable:

1058

if p in reachable:

1048

if r not in stoprevs:

1059

if r not in stoprevs:

1049

reachable.add(r)

1060

reachable.add(r)

1050

heads.add(r)

1061

heads.add(r)

1051

if p in heads and p not in stoprevs:

1062

if p in heads and p not in stoprevs:

1052

heads.remove(p)

1063

heads.remove(p)

1053

1064

1054

return [self.node(r) for r in heads]

1065

return [self.node(r) for r in heads]

1055

1066

1056

def children(self, node):

1067

def children(self, node):

1057

"""find the children of a given node"""

1068

"""find the children of a given node"""

1058

c = []

1069

c = []

1059

p = self.rev(node)

1070

p = self.rev(node)

1060

for r in self.revs(start=p + 1):

1071

for r in self.revs(start=p + 1):

1061

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1072

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

1062

if prevs:

1073

if prevs:

1063

for pr in prevs:

1074

for pr in prevs:

1064

if pr == p:

1075

if pr == p:

1065

c.append(self.node(r))

1076

c.append(self.node(r))

1066

elif p == nullrev:

1077

elif p == nullrev:

1067

c.append(self.node(r))

1078

c.append(self.node(r))

1068

return c

1079

return c

1069

1080

1070

def descendant(self, start, end):

1081

def descendant(self, start, end):

1071

if start == nullrev:

1082

if start == nullrev:

1072

return True

1083

return True

1073

for i in self.descendants([start]):

1084

for i in self.descendants([start]):

1074

if i == end:

1085

if i == end:

1075

return True

1086

return True

1076

elif i > end:

1087

elif i > end:

1077

break

1088

break

1078

return False

1089

return False

1079

1090

1080

def commonancestorsheads(self, a, b):

1091

def commonancestorsheads(self, a, b):

1081

"""calculate all the heads of the common ancestors of nodes a and b"""

1092

"""calculate all the heads of the common ancestors of nodes a and b"""

1082

a, b = self.rev(a), self.rev(b)

1093

a, b = self.rev(a), self.rev(b)

1083

try:

1094

try:

1084

ancs = self.index.commonancestorsheads(a, b)

1095

ancs = self.index.commonancestorsheads(a, b)

1085

except (AttributeError, OverflowError): # C implementation failed

1096

except (AttributeError, OverflowError): # C implementation failed

1086

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

1097

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

1087

return pycompat.maplist(self.node, ancs)

1098

return pycompat.maplist(self.node, ancs)

1088

1099

1089

def isancestor(self, a, b):

1100

def isancestor(self, a, b):

1090

"""return True if node a is an ancestor of node b

1101

"""return True if node a is an ancestor of node b

1091

1102

1092

The implementation of this is trivial but the use of

1103

The implementation of this is trivial but the use of

1093

commonancestorsheads is not."""

1104

commonancestorsheads is not."""

1094

return a in self.commonancestorsheads(a, b)

1105

return a in self.commonancestorsheads(a, b)

1095

1106

1096

def ancestor(self, a, b):

1107

def ancestor(self, a, b):

1097

"""calculate the "best" common ancestor of nodes a and b"""

1108

"""calculate the "best" common ancestor of nodes a and b"""

1098

1109

1099

a, b = self.rev(a), self.rev(b)

1110

a, b = self.rev(a), self.rev(b)

1100

try:

1111

try:

1101

ancs = self.index.ancestors(a, b)

1112

ancs = self.index.ancestors(a, b)

1102

except (AttributeError, OverflowError):

1113

except (AttributeError, OverflowError):

1103

ancs = ancestor.ancestors(self.parentrevs, a, b)

1114

ancs = ancestor.ancestors(self.parentrevs, a, b)

1104

if ancs:

1115

if ancs:

1105

# choose a consistent winner when there's a tie

1116

# choose a consistent winner when there's a tie

1106

return min(map(self.node, ancs))

1117

return min(map(self.node, ancs))

1107

return nullid

1118

return nullid

1108

1119

1109

def _match(self, id):

1120

def _match(self, id):

1110

if isinstance(id, int):

1121

if isinstance(id, int):

1111

# rev

1122

# rev

1112

return self.node(id)

1123

return self.node(id)

1113

if len(id) == 20:

1124

if len(id) == 20:

1114

# possibly a binary node

1125

# possibly a binary node

1115

# odds of a binary node being all hex in ASCII are 1 in 10**25

1126

# odds of a binary node being all hex in ASCII are 1 in 10**25

1116

try:

1127

try:

1117

node = id

1128

node = id

1118

self.rev(node) # quick search the index

1129

self.rev(node) # quick search the index

1119

return node

1130

return node

1120

except LookupError:

1131

except LookupError:

1121

pass # may be partial hex id

1132

pass # may be partial hex id

1122

try:

1133

try:

1123

# str(rev)

1134

# str(rev)

1124

rev = int(id)

1135

rev = int(id)

1125

if str(rev) != id:

1136

if str(rev) != id:

1126

raise ValueError

1137

raise ValueError

1127

if rev < 0:

1138

if rev < 0:

1128

rev = len(self) + rev

1139

rev = len(self) + rev

1129

if rev < 0 or rev >= len(self):

1140

if rev < 0 or rev >= len(self):

1130

raise ValueError

1141

raise ValueError

1131

return self.node(rev)

1142

return self.node(rev)

1132

except (ValueError, OverflowError):

1143

except (ValueError, OverflowError):

1133

pass

1144

pass

1134

if len(id) == 40:

1145

if len(id) == 40:

1135

try:

1146

try:

1136

# a full hex nodeid?

1147

# a full hex nodeid?

1137

node = bin(id)

1148

node = bin(id)

1138

self.rev(node)

1149

self.rev(node)

1139

return node

1150

return node

1140

except (TypeError, LookupError):

1151

except (TypeError, LookupError):

1141

pass

1152

pass

1142

1153

1143

def _partialmatch(self, id):

1154

def _partialmatch(self, id):

1144

maybewdir = wdirhex.startswith(id)

1155

maybewdir = wdirhex.startswith(id)

1145

try:

1156

try:

1146

partial = self.index.partialmatch(id)

1157

partial = self.index.partialmatch(id)

1147

if partial and self.hasnode(partial):

1158

if partial and self.hasnode(partial):

1148

if maybewdir:

1159

if maybewdir:

1149

# single 'ff...' match in radix tree, ambiguous with wdir

1160

# single 'ff...' match in radix tree, ambiguous with wdir

1150

raise RevlogError

1161

raise RevlogError

1151

return partial

1162

return partial

1152

if maybewdir:

1163

if maybewdir:

1153

# no 'ff...' match in radix tree, wdir identified

1164

# no 'ff...' match in radix tree, wdir identified

1154

raise error.WdirUnsupported

1165

raise error.WdirUnsupported

1155

return None

1166

return None

1156

except RevlogError:

1167

except RevlogError:

1157

# parsers.c radix tree lookup gave multiple matches

1168

# parsers.c radix tree lookup gave multiple matches

1158

# fast path: for unfiltered changelog, radix tree is accurate

1169

# fast path: for unfiltered changelog, radix tree is accurate

1159

if not getattr(self, 'filteredrevs', None):

1170

if not getattr(self, 'filteredrevs', None):

1160

raise LookupError(id, self.indexfile,

1171

raise LookupError(id, self.indexfile,

1161

_('ambiguous identifier'))

1172

_('ambiguous identifier'))

1162

# fall through to slow path that filters hidden revisions

1173

# fall through to slow path that filters hidden revisions

1163

except (AttributeError, ValueError):

1174

except (AttributeError, ValueError):

1164

# we are pure python, or key was too short to search radix tree

1175

# we are pure python, or key was too short to search radix tree

1165

pass

1176

pass

1166

1177

1167

if id in self._pcache:

1178

if id in self._pcache:

1168

return self._pcache[id]

1179

return self._pcache[id]

1169

1180

1170

if len(id) < 40:

1181

if len(id) < 40:

1171

try:

1182

try:

1172

# hex(node)[:...]

1183

# hex(node)[:...]

1173

l = len(id) // 2 # grab an even number of digits

1184

l = len(id) // 2 # grab an even number of digits

1174

prefix = bin(id[:l * 2])

1185

prefix = bin(id[:l * 2])

1175

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1186

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1176

nl = [n for n in nl if hex(n).startswith(id) and

1187

nl = [n for n in nl if hex(n).startswith(id) and

1177

self.hasnode(n)]

1188

self.hasnode(n)]

1178

if len(nl) > 0:

1189

if len(nl) > 0:

1179

if len(nl) == 1 and not maybewdir:

1190

if len(nl) == 1 and not maybewdir:

1180

self._pcache[id] = nl[0]

1191

self._pcache[id] = nl[0]

1181

return nl[0]

1192

return nl[0]

1182

raise LookupError(id, self.indexfile,

1193

raise LookupError(id, self.indexfile,

1183

_('ambiguous identifier'))

1194

_('ambiguous identifier'))

1184

if maybewdir:

1195

if maybewdir:

1185

raise error.WdirUnsupported

1196

raise error.WdirUnsupported

1186

return None

1197

return None

1187

except (TypeError, binascii.Error):

1198

except (TypeError, binascii.Error):

1188

pass

1199

pass

1189

1200

1190

def lookup(self, id):

1201

def lookup(self, id):

1191

"""locate a node based on:

1202

"""locate a node based on:

1192

- revision number or str(revision number)

1203

- revision number or str(revision number)

1193

- nodeid or subset of hex nodeid

1204

- nodeid or subset of hex nodeid

1194

"""

1205

"""

1195

n = self._match(id)

1206

n = self._match(id)

1196

if n is not None:

1207

if n is not None:

1197

return n

1208

return n

1198

n = self._partialmatch(id)

1209

n = self._partialmatch(id)

1199

if n:

1210

if n:

1200

return n

1211

return n

1201

1212

1202

raise LookupError(id, self.indexfile, _('no match found'))

1213

raise LookupError(id, self.indexfile, _('no match found'))

1203

1214

1204

def shortest(self, hexnode, minlength=1):

1215

def shortest(self, hexnode, minlength=1):

1205

"""Find the shortest unambiguous prefix that matches hexnode."""

1216

"""Find the shortest unambiguous prefix that matches hexnode."""

1206

def isvalid(test):

1217

def isvalid(test):

1207

try:

1218

try:

1208

if self._partialmatch(test) is None:

1219

if self._partialmatch(test) is None:

1209

return False

1220

return False

1210

1221

1211

try:

1222

try:

1212

i = int(test)

1223

i = int(test)

1213

# if we are a pure int, then starting with zero will not be

1224

# if we are a pure int, then starting with zero will not be

1214

# confused as a rev; or, obviously, if the int is larger

1225

# confused as a rev; or, obviously, if the int is larger

1215

# than the value of the tip rev

1226

# than the value of the tip rev

1216

if test[0] == '0' or i > len(self):

1227

if test[0] == '0' or i > len(self):

1217

return True

1228

return True

1218

return False

1229

return False

1219

except ValueError:

1230

except ValueError:

1220

return True

1231

return True

1221

except error.RevlogError:

1232

except error.RevlogError:

1222

return False

1233

return False

1223

except error.WdirUnsupported:

1234

except error.WdirUnsupported:

1224

# single 'ff...' match

1235

# single 'ff...' match

1225

return True

1236

return True

1226

1237

1227

shortest = hexnode

1238

shortest = hexnode

1228

startlength = max(6, minlength)

1239

startlength = max(6, minlength)

1229

length = startlength

1240

length = startlength

1230

while True:

1241

while True:

1231

test = hexnode[:length]

1242

test = hexnode[:length]

1232

if isvalid(test):

1243

if isvalid(test):

1233

shortest = test

1244

shortest = test

1234

if length == minlength or length > startlength:

1245

if length == minlength or length > startlength:

1235

return shortest

1246

return shortest

1236

length -= 1

1247

length -= 1

1237

else:

1248

else:

1238

length += 1

1249

length += 1

1239

if len(shortest) <= length:

1250

if len(shortest) <= length:

1240

return shortest

1251

return shortest

1241

1252

1242

def cmp(self, node, text):

1253

def cmp(self, node, text):

1243

"""compare text with a given file revision

1254

"""compare text with a given file revision

1244

1255

1245

returns True if text is different than what is stored.

1256

returns True if text is different than what is stored.

1246

"""

1257

"""

1247

p1, p2 = self.parents(node)

1258

p1, p2 = self.parents(node)

1248

return hash(text, p1, p2) != node

1259

return hash(text, p1, p2) != node

1249

1260

1250

def _cachesegment(self, offset, data):

1261

def _cachesegment(self, offset, data):

1251

"""Add a segment to the revlog cache.

1262

"""Add a segment to the revlog cache.

1252

1263

1253

Accepts an absolute offset and the data that is at that location.

1264

Accepts an absolute offset and the data that is at that location.

1254

"""

1265

"""

1255

o, d = self._chunkcache

1266

o, d = self._chunkcache

1256

# try to add to existing cache

1267

# try to add to existing cache

1257

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1268

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1258

self._chunkcache = o, d + data

1269

self._chunkcache = o, d + data

1259

else:

1270

else:

1260

self._chunkcache = offset, data

1271

self._chunkcache = offset, data

1261

1272

1262

def _readsegment(self, offset, length, df=None):

1273

def _readsegment(self, offset, length, df=None):

1263

"""Load a segment of raw data from the revlog.

1274

"""Load a segment of raw data from the revlog.

1264

1275

1265

Accepts an absolute offset, length to read, and an optional existing

1276

Accepts an absolute offset, length to read, and an optional existing

1266

file handle to read from.

1277

file handle to read from.

1267

1278

1268

If an existing file handle is passed, it will be seeked and the

1279

If an existing file handle is passed, it will be seeked and the

1269

original seek position will NOT be restored.

1280

original seek position will NOT be restored.

1270

1281

1271

Returns a str or buffer of raw byte data.

1282

Returns a str or buffer of raw byte data.

1272

"""

1283

"""

1273

if df is not None:

1284

if df is not None:

1274

closehandle = False

1285

closehandle = False

1275

else:

1286

else:

1276

if self._inline:

1287

if self._inline:

1277

df = self.opener(self.indexfile)

1288

df = self.opener(self.indexfile)

1278

else:

1289

else:

1279

df = self.opener(self.datafile)

1290

df = self.opener(self.datafile)

1280

closehandle = True

1291

closehandle = True

1281

1292

1282

# Cache data both forward and backward around the requested

1293

# Cache data both forward and backward around the requested

1283

# data, in a fixed size window. This helps speed up operations

1294

# data, in a fixed size window. This helps speed up operations

1284

# involving reading the revlog backwards.

1295

# involving reading the revlog backwards.

1285

cachesize = self._chunkcachesize

1296

cachesize = self._chunkcachesize

1286

realoffset = offset & ~(cachesize - 1)

1297

realoffset = offset & ~(cachesize - 1)

1287

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1298

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1288

- realoffset)

1299

- realoffset)

1289

df.seek(realoffset)

1300

df.seek(realoffset)

1290

d = df.read(reallength)

1301

d = df.read(reallength)

1291

if closehandle:

1302

if closehandle:

1292

df.close()

1303

df.close()

1293

self._cachesegment(realoffset, d)

1304

self._cachesegment(realoffset, d)

1294

if offset != realoffset or reallength != length:

1305

if offset != realoffset or reallength != length:

1295

return util.buffer(d, offset - realoffset, length)

1306

return util.buffer(d, offset - realoffset, length)

1296

return d

1307

return d

1297

1308

1298

def _getsegment(self, offset, length, df=None):

1309

def _getsegment(self, offset, length, df=None):

1299

"""Obtain a segment of raw data from the revlog.

1310

"""Obtain a segment of raw data from the revlog.

1300

1311

1301

Accepts an absolute offset, length of bytes to obtain, and an

1312

Accepts an absolute offset, length of bytes to obtain, and an

1302

optional file handle to the already-opened revlog. If the file

1313

optional file handle to the already-opened revlog. If the file

1303

handle is used, it's original seek position will not be preserved.

1314

handle is used, it's original seek position will not be preserved.

1304

1315

1305

Requests for data may be returned from a cache.

1316

Requests for data may be returned from a cache.

1306

1317

1307

Returns a str or a buffer instance of raw byte data.

1318

Returns a str or a buffer instance of raw byte data.

1308

"""

1319

"""

1309

o, d = self._chunkcache

1320

o, d = self._chunkcache

1310

l = len(d)

1321

l = len(d)

1311

1322

1312

# is it in the cache?

1323

# is it in the cache?

1313

cachestart = offset - o

1324

cachestart = offset - o

1314

cacheend = cachestart + length

1325

cacheend = cachestart + length

1315

if cachestart >= 0 and cacheend <= l:

1326

if cachestart >= 0 and cacheend <= l:

1316

if cachestart == 0 and cacheend == l:

1327

if cachestart == 0 and cacheend == l:

1317

return d # avoid a copy

1328

return d # avoid a copy

1318

return util.buffer(d, cachestart, cacheend - cachestart)

1329

return util.buffer(d, cachestart, cacheend - cachestart)

1319

1330

1320

return self._readsegment(offset, length, df=df)

1331

return self._readsegment(offset, length, df=df)

1321

1332

1322

def _getsegmentforrevs(self, startrev, endrev, df=None):

1333

def _getsegmentforrevs(self, startrev, endrev, df=None):

1323

"""Obtain a segment of raw data corresponding to a range of revisions.

1334

"""Obtain a segment of raw data corresponding to a range of revisions.

1324

1335

1325

Accepts the start and end revisions and an optional already-open

1336

Accepts the start and end revisions and an optional already-open

1326

file handle to be used for reading. If the file handle is read, its

1337

file handle to be used for reading. If the file handle is read, its

1327

seek position will not be preserved.

1338

seek position will not be preserved.

1328

1339

1329

Requests for data may be satisfied by a cache.

1340

Requests for data may be satisfied by a cache.

1330

1341

1331

Returns a 2-tuple of (offset, data) for the requested range of

1342

Returns a 2-tuple of (offset, data) for the requested range of

1332

revisions. Offset is the integer offset from the beginning of the

1343

revisions. Offset is the integer offset from the beginning of the

1333

revlog and data is a str or buffer of the raw byte data.

1344

revlog and data is a str or buffer of the raw byte data.

1334

1345

1335

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1346

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1336

to determine where each revision's data begins and ends.

1347

to determine where each revision's data begins and ends.

1337

"""

1348

"""

1338

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1349

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1339

# (functions are expensive).

1350

# (functions are expensive).

1340

index = self.index

1351

index = self.index

1341

istart = index[startrev]

1352

istart = index[startrev]

1342

start = int(istart[0] >> 16)

1353

start = int(istart[0] >> 16)

1343

if startrev == endrev:

1354

if startrev == endrev:

1344

end = start + istart[1]

1355

end = start + istart[1]

1345

else:

1356

else:

1346

iend = index[endrev]

1357

iend = index[endrev]

1347

end = int(iend[0] >> 16) + iend[1]

1358

end = int(iend[0] >> 16) + iend[1]

1348

1359

1349

if self._inline:

1360

if self._inline:

1350

start += (startrev + 1) * self._io.size

1361

start += (startrev + 1) * self._io.size

1351

end += (endrev + 1) * self._io.size

1362

end += (endrev + 1) * self._io.size

1352

length = end - start

1363

length = end - start

1353

1364

1354

return start, self._getsegment(start, length, df=df)

1365

return start, self._getsegment(start, length, df=df)

1355

1366

1356

def _chunk(self, rev, df=None):

1367

def _chunk(self, rev, df=None):

1357

"""Obtain a single decompressed chunk for a revision.

1368

"""Obtain a single decompressed chunk for a revision.

1358

1369

1359

Accepts an integer revision and an optional already-open file handle

1370

Accepts an integer revision and an optional already-open file handle

1360

to be used for reading. If used, the seek position of the file will not

1371

to be used for reading. If used, the seek position of the file will not

1361

be preserved.

1372

be preserved.

1362

1373

1363

Returns a str holding uncompressed data for the requested revision.

1374

Returns a str holding uncompressed data for the requested revision.

1364

"""

1375

"""

1365

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1376

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1366

1377

1367

def _chunks(self, revs, df=None):

1378

def _chunks(self, revs, df=None):

1368

"""Obtain decompressed chunks for the specified revisions.

1379

"""Obtain decompressed chunks for the specified revisions.

1369

1380

1370

Accepts an iterable of numeric revisions that are assumed to be in

1381

Accepts an iterable of numeric revisions that are assumed to be in

1371

ascending order. Also accepts an optional already-open file handle

1382

ascending order. Also accepts an optional already-open file handle

1372

to be used for reading. If used, the seek position of the file will

1383

to be used for reading. If used, the seek position of the file will

1373

not be preserved.

1384

not be preserved.

1374

1385

1375

This function is similar to calling ``self._chunk()`` multiple times,

1386

This function is similar to calling ``self._chunk()`` multiple times,

1376

but is faster.

1387

but is faster.

1377

1388

1378

Returns a list with decompressed data for each requested revision.

1389

Returns a list with decompressed data for each requested revision.

1379

"""

1390

"""

1380

if not revs:

1391

if not revs:

1381

return []

1392

return []

1382

start = self.start

1393

start = self.start

1383

length = self.length

1394

length = self.length

1384

inline = self._inline

1395

inline = self._inline

1385

iosize = self._io.size

1396

iosize = self._io.size

1386

buffer = util.buffer

1397

buffer = util.buffer

1387

1398

1388

l = []

1399

l = []

1389

ladd = l.append

1400

ladd = l.append

1390

1401

1391

if not self._withsparseread:

1402

if not self._withsparseread:

1392

slicedchunks = (revs,)

1403

slicedchunks = (revs,)

1393

else:

1404

else:

1394

slicedchunks = _slicechunk(self, revs)

1405

slicedchunks = _slicechunk(self, revs)

1395

1406

1396

for revschunk in slicedchunks:

1407

for revschunk in slicedchunks:

1397

firstrev = revschunk[0]

1408

firstrev = revschunk[0]

1398

# Skip trailing revisions with empty diff

1409

# Skip trailing revisions with empty diff

1399

for lastrev in revschunk[::-1]:

1410

for lastrev in revschunk[::-1]:

1400

if length(lastrev) != 0:

1411

if length(lastrev) != 0:

1401

break

1412

break

1402

1413

1403

try:

1414

try:

1404

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1415

offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)

1405

except OverflowError:

1416

except OverflowError:

1406

# issue4215 - we can't cache a run of chunks greater than

1417

# issue4215 - we can't cache a run of chunks greater than

1407

# 2G on Windows

1418

# 2G on Windows

1408

return [self._chunk(rev, df=df) for rev in revschunk]

1419

return [self._chunk(rev, df=df) for rev in revschunk]

1409

1420

1410

decomp = self.decompress

1421

decomp = self.decompress

1411

for rev in revschunk:

1422

for rev in revschunk:

1412

chunkstart = start(rev)

1423

chunkstart = start(rev)

1413

if inline:

1424

if inline:

1414

chunkstart += (rev + 1) * iosize

1425

chunkstart += (rev + 1) * iosize

1415

chunklength = length(rev)

1426

chunklength = length(rev)

1416

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1427

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1417

1428

1418

return l

1429

return l

1419

1430

1420

def _chunkclear(self):

1431

def _chunkclear(self):

1421

"""Clear the raw chunk cache."""

1432

"""Clear the raw chunk cache."""

1422

self._chunkcache = (0, '')

1433

self._chunkcache = (0, '')

1423

1434

1424

def deltaparent(self, rev):

1435

def deltaparent(self, rev):

1425

"""return deltaparent of the given revision"""

1436

"""return deltaparent of the given revision"""

1426

base = self.index[rev][3]

1437

base = self.index[rev][3]

1427

if base == rev:

1438

if base == rev:

1428

return nullrev

1439

return nullrev

1429

elif self._generaldelta:

1440

elif self._generaldelta:

1430

return base

1441

return base

1431

else:

1442

else:

1432

return rev - 1

1443

return rev - 1

1433

1444

1434

def revdiff(self, rev1, rev2):

1445

def revdiff(self, rev1, rev2):

1435

"""return or calculate a delta between two revisions

1446

"""return or calculate a delta between two revisions

1436

1447

1437

The delta calculated is in binary form and is intended to be written to

1448

The delta calculated is in binary form and is intended to be written to

1438

revlog data directly. So this function needs raw revision data.

1449

revlog data directly. So this function needs raw revision data.

1439

"""

1450

"""

1440

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1451

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1441

return bytes(self._chunk(rev2))

1452

return bytes(self._chunk(rev2))

1442

1453

1443

return mdiff.textdiff(self.revision(rev1, raw=True),

1454

return mdiff.textdiff(self.revision(rev1, raw=True),

1444

self.revision(rev2, raw=True))

1455

self.revision(rev2, raw=True))

1445

1456

1446

def revision(self, nodeorrev, _df=None, raw=False):

1457

def revision(self, nodeorrev, _df=None, raw=False):

1447

"""return an uncompressed revision of a given node or revision

1458

"""return an uncompressed revision of a given node or revision

1448

number.

1459

number.

1449

1460

1450

_df - an existing file handle to read from. (internal-only)

1461

_df - an existing file handle to read from. (internal-only)

1451

raw - an optional argument specifying if the revision data is to be

1462

raw - an optional argument specifying if the revision data is to be

1452

treated as raw data when applying flag transforms. 'raw' should be set

1463

treated as raw data when applying flag transforms. 'raw' should be set

1453

to True when generating changegroups or in debug commands.

1464

to True when generating changegroups or in debug commands.

1454

"""

1465

"""

1455

if isinstance(nodeorrev, int):

1466

if isinstance(nodeorrev, int):

1456

rev = nodeorrev

1467

rev = nodeorrev

1457

node = self.node(rev)

1468

node = self.node(rev)

1458

else:

1469

else:

1459

node = nodeorrev

1470

node = nodeorrev

1460

rev = None

1471

rev = None

1461

1472

1462

cachedrev = None

1473

cachedrev = None

1463

flags = None

1474

flags = None

1464

rawtext = None

1475

rawtext = None

1465

if node == nullid:

1476

if node == nullid:

1466

return ""

1477

return ""

1467

if self._cache:

1478

if self._cache:

1468

if self._cache[0] == node:

1479

if self._cache[0] == node:

1469

# _cache only stores rawtext

1480

# _cache only stores rawtext

1470

if raw:

1481

if raw:

1471

return self._cache[2]

1482

return self._cache[2]

1472

# duplicated, but good for perf

1483

# duplicated, but good for perf

1473

if rev is None:

1484

if rev is None:

1474

rev = self.rev(node)

1485

rev = self.rev(node)

1475

if flags is None:

1486

if flags is None:

1476

flags = self.flags(rev)

1487

flags = self.flags(rev)

1477

# no extra flags set, no flag processor runs, text = rawtext

1488

# no extra flags set, no flag processor runs, text = rawtext

1478

if flags == REVIDX_DEFAULT_FLAGS:

1489

if flags == REVIDX_DEFAULT_FLAGS:

1479

return self._cache[2]

1490

return self._cache[2]

1480

# rawtext is reusable. need to run flag processor

1491

# rawtext is reusable. need to run flag processor

1481

rawtext = self._cache[2]

1492

rawtext = self._cache[2]

1482

1493

1483

cachedrev = self._cache[1]

1494

cachedrev = self._cache[1]

1484

1495

1485

# look up what we need to read

1496

# look up what we need to read

1486

if rawtext is None:

1497

if rawtext is None:

1487

if rev is None:

1498

if rev is None:

1488

rev = self.rev(node)

1499

rev = self.rev(node)

1489

1500

1490

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1501

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1491

if stopped:

1502

if stopped:

1492

rawtext = self._cache[2]

1503

rawtext = self._cache[2]

1493

1504

1494

# drop cache to save memory

1505

# drop cache to save memory

1495

self._cache = None

1506

self._cache = None

1496

1507

1497

bins = self._chunks(chain, df=_df)

1508

bins = self._chunks(chain, df=_df)

1498

if rawtext is None:

1509

if rawtext is None:

1499

rawtext = bytes(bins[0])

1510

rawtext = bytes(bins[0])

1500

bins = bins[1:]

1511

bins = bins[1:]

1501

1512

1502

rawtext = mdiff.patches(rawtext, bins)

1513

rawtext = mdiff.patches(rawtext, bins)

1503

self._cache = (node, rev, rawtext)

1514

self._cache = (node, rev, rawtext)

1504

1515

1505

if flags is None:

1516

if flags is None:

1506

if rev is None:

1517

if rev is None:

1507

rev = self.rev(node)

1518

rev = self.rev(node)

1508

flags = self.flags(rev)

1519

flags = self.flags(rev)

1509

1520

1510

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1521

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1511

if validatehash:

1522

if validatehash:

1512

self.checkhash(text, node, rev=rev)

1523

self.checkhash(text, node, rev=rev)

1513

1524

1514

return text

1525

return text

1515

1526

1516

def hash(self, text, p1, p2):

1527

def hash(self, text, p1, p2):

1517

"""Compute a node hash.

1528

"""Compute a node hash.

1518

1529

1519

Available as a function so that subclasses can replace the hash

1530

Available as a function so that subclasses can replace the hash

1520

as needed.

1531

as needed.

1521

"""

1532

"""

1522

return hash(text, p1, p2)

1533

return hash(text, p1, p2)

1523

1534

1524

def _processflags(self, text, flags, operation, raw=False):

1535

def _processflags(self, text, flags, operation, raw=False):

1525

"""Inspect revision data flags and applies transforms defined by

1536

"""Inspect revision data flags and applies transforms defined by

1526

registered flag processors.

1537

registered flag processors.

1527

1538

1528

``text`` - the revision data to process

1539

``text`` - the revision data to process

1529

``flags`` - the revision flags

1540

``flags`` - the revision flags

1530

``operation`` - the operation being performed (read or write)

1541

``operation`` - the operation being performed (read or write)

1531

``raw`` - an optional argument describing if the raw transform should be

1542

``raw`` - an optional argument describing if the raw transform should be

1532

applied.

1543

applied.

1533

1544

1534

This method processes the flags in the order (or reverse order if

1545

This method processes the flags in the order (or reverse order if

1535

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1546

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1536

flag processors registered for present flags. The order of flags defined

1547

flag processors registered for present flags. The order of flags defined

1537

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1548

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1538

1549

1539

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1550

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1540

processed text and ``validatehash`` is a bool indicating whether the

1551

processed text and ``validatehash`` is a bool indicating whether the

1541

returned text should be checked for hash integrity.

1552

returned text should be checked for hash integrity.

1542

1553

1543

Note: If the ``raw`` argument is set, it has precedence over the

1554

Note: If the ``raw`` argument is set, it has precedence over the

1544

operation and will only update the value of ``validatehash``.

1555

operation and will only update the value of ``validatehash``.

1545

"""

1556

"""

1546

# fast path: no flag processors will run

1557

# fast path: no flag processors will run

1547

if flags == 0:

1558

if flags == 0:

1548

return text, True

1559

return text, True

1549

if not operation in ('read', 'write'):

1560

if not operation in ('read', 'write'):

1550

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1561

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1551

# Check all flags are known.

1562

# Check all flags are known.

1552

if flags & ~REVIDX_KNOWN_FLAGS:

1563

if flags & ~REVIDX_KNOWN_FLAGS:

1553

raise RevlogError(_("incompatible revision flag '%#x'") %

1564

raise RevlogError(_("incompatible revision flag '%#x'") %

1554

(flags & ~REVIDX_KNOWN_FLAGS))

1565

(flags & ~REVIDX_KNOWN_FLAGS))

1555

validatehash = True

1566

validatehash = True

1556

# Depending on the operation (read or write), the order might be

1567

# Depending on the operation (read or write), the order might be

1557

# reversed due to non-commutative transforms.

1568

# reversed due to non-commutative transforms.

1558

orderedflags = REVIDX_FLAGS_ORDER

1569

orderedflags = REVIDX_FLAGS_ORDER

1559

if operation == 'write':

1570

if operation == 'write':

1560

orderedflags = reversed(orderedflags)

1571

orderedflags = reversed(orderedflags)

1561

1572

1562

for flag in orderedflags:

1573

for flag in orderedflags:

1563

# If a flagprocessor has been registered for a known flag, apply the

1574

# If a flagprocessor has been registered for a known flag, apply the

1564

# related operation transform and update result tuple.

1575

# related operation transform and update result tuple.

1565

if flag & flags:

1576

if flag & flags:

1566

vhash = True

1577

vhash = True

1567

1578

1568

if flag not in _flagprocessors:

1579

if flag not in _flagprocessors:

1569

message = _("missing processor for flag '%#x'") % (flag)

1580

message = _("missing processor for flag '%#x'") % (flag)

1570

raise RevlogError(message)

1581

raise RevlogError(message)

1571

1582

1572

processor = _flagprocessors[flag]

1583

processor = _flagprocessors[flag]

1573

if processor is not None:

1584

if processor is not None:

1574

readtransform, writetransform, rawtransform = processor

1585

readtransform, writetransform, rawtransform = processor

1575

1586

1576

if raw:

1587

if raw:

1577

vhash = rawtransform(self, text)

1588

vhash = rawtransform(self, text)

1578

elif operation == 'read':

1589

elif operation == 'read':

1579

text, vhash = readtransform(self, text)

1590

text, vhash = readtransform(self, text)

1580

else: # write operation

1591

else: # write operation

1581

text, vhash = writetransform(self, text)

1592

text, vhash = writetransform(self, text)

1582

validatehash = validatehash and vhash

1593

validatehash = validatehash and vhash

1583

1594

1584

return text, validatehash

1595

return text, validatehash

1585

1596

1586

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1597

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1587

"""Check node hash integrity.

1598

"""Check node hash integrity.

1588

1599

1589

Available as a function so that subclasses can extend hash mismatch

1600

Available as a function so that subclasses can extend hash mismatch

1590

behaviors as needed.

1601

behaviors as needed.

1591

"""

1602

"""

1592

if p1 is None and p2 is None:

1603

if p1 is None and p2 is None:

1593

p1, p2 = self.parents(node)

1604

p1, p2 = self.parents(node)

1594

if node != self.hash(text, p1, p2):

1605

if node != self.hash(text, p1, p2):

1595

revornode = rev

1606

revornode = rev

1596

if revornode is None:

1607

if revornode is None:

1597

revornode = templatefilters.short(hex(node))

1608

revornode = templatefilters.short(hex(node))

1598

raise RevlogError(_("integrity check failed on %s:%s")

1609

raise RevlogError(_("integrity check failed on %s:%s")

1599

% (self.indexfile, pycompat.bytestr(revornode)))

1610

% (self.indexfile, pycompat.bytestr(revornode)))

1600

1611

1601

def checkinlinesize(self, tr, fp=None):

1612

def checkinlinesize(self, tr, fp=None):

1602

"""Check if the revlog is too big for inline and convert if so.

1613

"""Check if the revlog is too big for inline and convert if so.

1603

1614

1604

This should be called after revisions are added to the revlog. If the

1615

This should be called after revisions are added to the revlog. If the

1605

revlog has grown too large to be an inline revlog, it will convert it

1616

revlog has grown too large to be an inline revlog, it will convert it

1606

to use multiple index and data files.

1617

to use multiple index and data files.

1607

"""

1618

"""

1608

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1619

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1609

return

1620

return

1610

1621

1611

trinfo = tr.find(self.indexfile)

1622

trinfo = tr.find(self.indexfile)

1612

if trinfo is None:

1623

if trinfo is None:

1613

raise RevlogError(_("%s not found in the transaction")

1624

raise RevlogError(_("%s not found in the transaction")

1614

% self.indexfile)

1625

% self.indexfile)

1615

1626

1616

trindex = trinfo[2]

1627

trindex = trinfo[2]

1617

if trindex is not None:

1628

if trindex is not None:

1618

dataoff = self.start(trindex)

1629

dataoff = self.start(trindex)

1619

else:

1630

else:

1620

# revlog was stripped at start of transaction, use all leftover data

1631

# revlog was stripped at start of transaction, use all leftover data

1621

trindex = len(self) - 1

1632

trindex = len(self) - 1

1622

dataoff = self.end(-2)

1633

dataoff = self.end(-2)

1623

1634

1624

tr.add(self.datafile, dataoff)

1635

tr.add(self.datafile, dataoff)

1625

1636

1626

if fp:

1637

if fp:

1627

fp.flush()

1638

fp.flush()

1628

fp.close()

1639

fp.close()

1629

1640

1630

df = self.opener(self.datafile, 'w')

1641

df = self.opener(self.datafile, 'w')

1631

try:

1642

try:

1632

for r in self:

1643

for r in self:

1633

df.write(self._getsegmentforrevs(r, r)[1])

1644

df.write(self._getsegmentforrevs(r, r)[1])

1634

finally:

1645

finally:

1635

df.close()

1646

df.close()

1636

1647

1637

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1648

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1638

checkambig=self._checkambig)

1649

checkambig=self._checkambig)

1639

self.version &= ~FLAG_INLINE_DATA

1650

self.version &= ~FLAG_INLINE_DATA

1640

self._inline = False

1651

self._inline = False

1641

for i in self:

1652

for i in self:

1642

e = self._io.packentry(self.index[i], self.node, self.version, i)

1653

e = self._io.packentry(self.index[i], self.node, self.version, i)

1643

fp.write(e)

1654

fp.write(e)

1644

1655

1645

# if we don't call close, the temp file will never replace the

1656

# if we don't call close, the temp file will never replace the

1646

# real index

1657

# real index

1647

fp.close()

1658

fp.close()

1648

1659

1649

tr.replace(self.indexfile, trindex * self._io.size)

1660

tr.replace(self.indexfile, trindex * self._io.size)

1650

self._chunkclear()

1661

self._chunkclear()

1651

1662

1652

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1663

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1653

node=None, flags=REVIDX_DEFAULT_FLAGS):

1664

node=None, flags=REVIDX_DEFAULT_FLAGS):

1654

"""add a revision to the log

1665

"""add a revision to the log

1655

1666

1656

text - the revision data to add

1667

text - the revision data to add

1657

transaction - the transaction object used for rollback

1668

transaction - the transaction object used for rollback

1658

link - the linkrev data to add

1669

link - the linkrev data to add

1659

p1, p2 - the parent nodeids of the revision

1670

p1, p2 - the parent nodeids of the revision

1660

cachedelta - an optional precomputed delta

1671

cachedelta - an optional precomputed delta

1661

node - nodeid of revision; typically node is not specified, and it is

1672

node - nodeid of revision; typically node is not specified, and it is

1662

computed by default as hash(text, p1, p2), however subclasses might

1673

computed by default as hash(text, p1, p2), however subclasses might

1663

use different hashing method (and override checkhash() in such case)

1674

use different hashing method (and override checkhash() in such case)

1664

flags - the known flags to set on the revision

1675

flags - the known flags to set on the revision

1665

"""

1676

"""

1666

if link == nullrev:

1677

if link == nullrev:

1667

raise RevlogError(_("attempted to add linkrev -1 to %s")

1678

raise RevlogError(_("attempted to add linkrev -1 to %s")

1668

% self.indexfile)

1679

% self.indexfile)

1669

1680

1670

if flags:

1681

if flags:

1671

node = node or self.hash(text, p1, p2)

1682

node = node or self.hash(text, p1, p2)

1672

1683

1673

rawtext, validatehash = self._processflags(text, flags, 'write')

1684

rawtext, validatehash = self._processflags(text, flags, 'write')

1674

1685

1675

# If the flag processor modifies the revision data, ignore any provided

1686

# If the flag processor modifies the revision data, ignore any provided

1676

# cachedelta.

1687

# cachedelta.

1677

if rawtext != text:

1688

if rawtext != text:

1678

cachedelta = None

1689

cachedelta = None

1679

1690

1680

if len(rawtext) > _maxentrysize:

1691

if len(rawtext) > _maxentrysize:

1681

raise RevlogError(

1692

raise RevlogError(

1682

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1693

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1683

% (self.indexfile, len(rawtext)))

1694

% (self.indexfile, len(rawtext)))

1684

1695

1685

node = node or self.hash(rawtext, p1, p2)

1696

node = node or self.hash(rawtext, p1, p2)

1686

if node in self.nodemap:

1697

if node in self.nodemap:

1687

return node

1698

return node

1688

1699

1689

if validatehash:

1700

if validatehash:

1690

self.checkhash(rawtext, node, p1=p1, p2=p2)

1701

self.checkhash(rawtext, node, p1=p1, p2=p2)

1691

1702

1692

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1703

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1693

flags, cachedelta=cachedelta)

1704

flags, cachedelta=cachedelta)

1694

1705

1695

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1706

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1696

cachedelta=None):

1707

cachedelta=None):

1697

"""add a raw revision with known flags, node and parents

1708

"""add a raw revision with known flags, node and parents

1698

useful when reusing a revision not stored in this revlog (ex: received

1709

useful when reusing a revision not stored in this revlog (ex: received

1699

over wire, or read from an external bundle).

1710

over wire, or read from an external bundle).

1700

"""

1711

"""

1701

dfh = None

1712

dfh = None

1702

if not self._inline:

1713

if not self._inline:

1703

dfh = self.opener(self.datafile, "a+")

1714

dfh = self.opener(self.datafile, "a+")

1704

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1715

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1705

try:

1716

try:

1706

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1717

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1707

flags, cachedelta, ifh, dfh)

1718

flags, cachedelta, ifh, dfh)

1708

finally:

1719

finally:

1709

if dfh:

1720

if dfh:

1710

dfh.close()

1721

dfh.close()

1711

ifh.close()

1722

ifh.close()

1712

1723

1713

def compress(self, data):

1724

def compress(self, data):

1714

"""Generate a possibly-compressed representation of data."""

1725

"""Generate a possibly-compressed representation of data."""

1715

if not data:

1726

if not data:

1716

return '', data

1727

return '', data

1717

1728

1718

compressed = self._compressor.compress(data)

1729

compressed = self._compressor.compress(data)

1719

1730

1720

if compressed:

1731

if compressed:

1721

# The revlog compressor added the header in the returned data.

1732

# The revlog compressor added the header in the returned data.

1722

return '', compressed

1733

return '', compressed

1723

1734

1724

if data[0:1] == '\0':

1735

if data[0:1] == '\0':

1725

return '', data

1736

return '', data

1726

return 'u', data

1737

return 'u', data

1727

1738

1728

def decompress(self, data):

1739

def decompress(self, data):

1729

"""Decompress a revlog chunk.

1740

"""Decompress a revlog chunk.

1730

1741

1731

The chunk is expected to begin with a header identifying the

1742

The chunk is expected to begin with a header identifying the

1732

format type so it can be routed to an appropriate decompressor.

1743

format type so it can be routed to an appropriate decompressor.

1733

"""

1744

"""

1734

if not data:

1745

if not data:

1735

return data

1746

return data

1736

1747

1737

# Revlogs are read much more frequently than they are written and many

1748

# Revlogs are read much more frequently than they are written and many

1738

# chunks only take microseconds to decompress, so performance is

1749

# chunks only take microseconds to decompress, so performance is

1739

# important here.

1750

# important here.

1740

#

1751

#

1741

# We can make a few assumptions about revlogs:

1752

# We can make a few assumptions about revlogs:

1742

#

1753

#

1743

# 1) the majority of chunks will be compressed (as opposed to inline

1754

# 1) the majority of chunks will be compressed (as opposed to inline

1744

# raw data).

1755

# raw data).

1745

# 2) decompressing *any* data will likely by at least 10x slower than

1756

# 2) decompressing *any* data will likely by at least 10x slower than

1746

# returning raw inline data.

1757

# returning raw inline data.

1747

# 3) we want to prioritize common and officially supported compression

1758

# 3) we want to prioritize common and officially supported compression

1748

# engines

1759

# engines

1749

#

1760

#

1750

# It follows that we want to optimize for "decompress compressed data

1761

# It follows that we want to optimize for "decompress compressed data

1751

# when encoded with common and officially supported compression engines"

1762

# when encoded with common and officially supported compression engines"

1752

# case over "raw data" and "data encoded by less common or non-official

1763

# case over "raw data" and "data encoded by less common or non-official

1753

# compression engines." That is why we have the inline lookup first

1764

# compression engines." That is why we have the inline lookup first

1754

# followed by the compengines lookup.

1765

# followed by the compengines lookup.

1755

#

1766

#

1756

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1767

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1757

# compressed chunks. And this matters for changelog and manifest reads.

1768

# compressed chunks. And this matters for changelog and manifest reads.

1758

t = data[0:1]

1769

t = data[0:1]

1759

1770

1760

if t == 'x':

1771

if t == 'x':

1761

try:

1772

try:

1762

return _zlibdecompress(data)

1773

return _zlibdecompress(data)

1763

except zlib.error as e:

1774

except zlib.error as e:

1764

raise RevlogError(_('revlog decompress error: %s') % str(e))

1775

raise RevlogError(_('revlog decompress error: %s') % str(e))

1765

# '\0' is more common than 'u' so it goes first.

1776

# '\0' is more common than 'u' so it goes first.

1766

elif t == '\0':

1777

elif t == '\0':

1767

return data

1778

return data

1768

elif t == 'u':

1779

elif t == 'u':

1769

return util.buffer(data, 1)

1780

return util.buffer(data, 1)

1770

1781

1771

try:

1782

try:

1772

compressor = self._decompressors[t]

1783

compressor = self._decompressors[t]

1773

except KeyError:

1784

except KeyError:

1774

try:

1785

try:

1775

engine = util.compengines.forrevlogheader(t)

1786

engine = util.compengines.forrevlogheader(t)

1776

compressor = engine.revlogcompressor()

1787

compressor = engine.revlogcompressor()

1777

self._decompressors[t] = compressor

1788

self._decompressors[t] = compressor

1778

except KeyError:

1789

except KeyError:

1779

raise RevlogError(_('unknown compression type %r') % t)

1790

raise RevlogError(_('unknown compression type %r') % t)

1780

1791

1781

return compressor.decompress(data)

1792

return compressor.decompress(data)

1782

1793

1783

def _isgooddelta(self, d, textlen):

1794

def _isgooddelta(self, d, textlen):

1784

"""Returns True if the given delta is good. Good means that it is within

1795

"""Returns True if the given delta is good. Good means that it is within

1785

the disk span, disk size, and chain length bounds that we know to be

1796

the disk span, disk size, and chain length bounds that we know to be

1786

performant."""

1797

performant."""

1787

if d is None:

1798

if d is None:

1788

return False

1799

return False

1789

1800

1790

# - 'dist' is the distance from the base revision -- bounding it limits

1801

# - 'dist' is the distance from the base revision -- bounding it limits

1791

# the amount of I/O we need to do.

1802

# the amount of I/O we need to do.

1792

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1803

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1793

# to apply -- bounding it limits the amount of CPU we consume.

1804

# to apply -- bounding it limits the amount of CPU we consume.

1794

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1805

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1795

1806

1796

defaultmax = textlen * 4

1807

defaultmax = textlen * 4

1797

maxdist = self._maxdeltachainspan

1808

maxdist = self._maxdeltachainspan

1798

if not maxdist:

1809

if not maxdist:

1799

maxdist = dist # ensure the conditional pass

1810

maxdist = dist # ensure the conditional pass

1800

maxdist = max(maxdist, defaultmax)

1811

maxdist = max(maxdist, defaultmax)

1801

if (dist > maxdist or l > textlen or

1812

if (dist > maxdist or l > textlen or

1802

compresseddeltalen > textlen * 2 or

1813

compresseddeltalen > textlen * 2 or

1803

(self._maxchainlen and chainlen > self._maxchainlen)):

1814

(self._maxchainlen and chainlen > self._maxchainlen)):

1804

return False

1815

return False

1805

1816

1806

return True

1817

return True

1807

1818

1808

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1819

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1809

cachedelta, ifh, dfh, alwayscache=False):

1820

cachedelta, ifh, dfh, alwayscache=False):

1810

"""internal function to add revisions to the log

1821

"""internal function to add revisions to the log

1811

1822

1812

see addrevision for argument descriptions.

1823

see addrevision for argument descriptions.

1813

1824

1814

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1825

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1815

1826

1816

invariants:

1827

invariants:

1817

- rawtext is optional (can be None); if not set, cachedelta must be set.

1828

- rawtext is optional (can be None); if not set, cachedelta must be set.

1818

if both are set, they must correspond to each other.

1829

if both are set, they must correspond to each other.

1819

"""

1830

"""

1820

if node == nullid:

1831

if node == nullid:

1821

raise RevlogError(_("%s: attempt to add null revision") %

1832

raise RevlogError(_("%s: attempt to add null revision") %

1822

(self.indexfile))

1833

(self.indexfile))

1823

if node == wdirid:

1834

if node == wdirid:

1824

raise RevlogError(_("%s: attempt to add wdir revision") %

1835

raise RevlogError(_("%s: attempt to add wdir revision") %

1825

(self.indexfile))

1836

(self.indexfile))

1826

1837

1827

btext = [rawtext]

1838

btext = [rawtext]

1828

def buildtext():

1839

def buildtext():

1829

if btext[0] is not None:

1840

if btext[0] is not None:

1830

return btext[0]

1841

return btext[0]

1831

baserev = cachedelta[0]

1842

baserev = cachedelta[0]

1832

delta = cachedelta[1]

1843

delta = cachedelta[1]

1833

# special case deltas which replace entire base; no need to decode

1844

# special case deltas which replace entire base; no need to decode

1834

# base revision. this neatly avoids censored bases, which throw when

1845

# base revision. this neatly avoids censored bases, which throw when

1835

# they're decoded.

1846

# they're decoded.

1836

hlen = struct.calcsize(">lll")

1847

hlen = struct.calcsize(">lll")

1837

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1848

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1838

len(delta) - hlen):

1849

len(delta) - hlen):

1839

btext[0] = delta[hlen:]

1850

btext[0] = delta[hlen:]

1840

else:

1851

else:

1841

if self._inline:

1852

if self._inline:

1842

fh = ifh

1853

fh = ifh

1843

else:

1854

else:

1844

fh = dfh

1855

fh = dfh

1845

basetext = self.revision(baserev, _df=fh, raw=True)

1856

basetext = self.revision(baserev, _df=fh, raw=True)

1846

btext[0] = mdiff.patch(basetext, delta)

1857

btext[0] = mdiff.patch(basetext, delta)

1847

1858

1848

try:

1859

try:

1849

res = self._processflags(btext[0], flags, 'read', raw=True)

1860

res = self._processflags(btext[0], flags, 'read', raw=True)

1850

btext[0], validatehash = res

1861

btext[0], validatehash = res

1851

if validatehash:

1862

if validatehash:

1852

self.checkhash(btext[0], node, p1=p1, p2=p2)

1863

self.checkhash(btext[0], node, p1=p1, p2=p2)

1853

if flags & REVIDX_ISCENSORED:

1864

if flags & REVIDX_ISCENSORED:

1854

raise RevlogError(_('node %s is not censored') % node)

1865

raise RevlogError(_('node %s is not censored') % node)

1855

except CensoredNodeError:

1866

except CensoredNodeError:

1856

# must pass the censored index flag to add censored revisions

1867

# must pass the censored index flag to add censored revisions

1857

if not flags & REVIDX_ISCENSORED:

1868

if not flags & REVIDX_ISCENSORED:

1858

raise

1869

raise

1859

return btext[0]

1870

return btext[0]

1860

1871

1861

def builddelta(rev):

1872

def builddelta(rev):

1862

# can we use the cached delta?

1873

# can we use the cached delta?

1863

if cachedelta and cachedelta[0] == rev:

1874

if cachedelta and cachedelta[0] == rev:

1864

delta = cachedelta[1]

1875

delta = cachedelta[1]

1865

else:

1876

else:

1866

t = buildtext()

1877

t = buildtext()

1867

if self.iscensored(rev):

1878

if self.iscensored(rev):

1868

# deltas based on a censored revision must replace the

1879

# deltas based on a censored revision must replace the

1869

# full content in one patch, so delta works everywhere

1880

# full content in one patch, so delta works everywhere

1870

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1881

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1871

delta = header + t

1882

delta = header + t

1872

else:

1883

else:

1873

if self._inline:

1884

if self._inline:

1874

fh = ifh

1885

fh = ifh

1875

else:

1886

else:

1876

fh = dfh

1887

fh = dfh

1877

ptext = self.revision(rev, _df=fh, raw=True)

1888

ptext = self.revision(rev, _df=fh, raw=True)

1878

delta = mdiff.textdiff(ptext, t)

1889

delta = mdiff.textdiff(ptext, t)

1879

header, data = self.compress(delta)

1890

header, data = self.compress(delta)

1880

deltalen = len(header) + len(data)

1891

deltalen = len(header) + len(data)

1881

chainbase = self.chainbase(rev)

1892

chainbase = self.chainbase(rev)

1882

dist = deltalen + offset - self.start(chainbase)

1893

dist = deltalen + offset - self.start(chainbase)

1883

if self._generaldelta:

1894

if self._generaldelta:

1884

base = rev

1895

base = rev

1885

else:

1896

else:

1886

base = chainbase

1897

base = chainbase

1887

chainlen, compresseddeltalen = self._chaininfo(rev)

1898

chainlen, compresseddeltalen = self._chaininfo(rev)

1888

chainlen += 1

1899

chainlen += 1

1889

compresseddeltalen += deltalen

1900

compresseddeltalen += deltalen

1890

return (dist, deltalen, (header, data), base,

1901

return (dist, deltalen, (header, data), base,

1891

chainbase, chainlen, compresseddeltalen)

1902

chainbase, chainlen, compresseddeltalen)

1892

1903

1893

curr = len(self)

1904

curr = len(self)

1894

prev = curr - 1

1905

prev = curr - 1

1895

offset = self.end(prev)

1906

offset = self.end(prev)

1896

delta = None

1907

delta = None

1897

p1r, p2r = self.rev(p1), self.rev(p2)

1908

p1r, p2r = self.rev(p1), self.rev(p2)

1898

1909

1899

# full versions are inserted when the needed deltas

1910

# full versions are inserted when the needed deltas

1900

# become comparable to the uncompressed text

1911

# become comparable to the uncompressed text

1901

if rawtext is None:

1912

if rawtext is None:

1902

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1913

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1903

cachedelta[1])

1914

cachedelta[1])

1904

else:

1915

else:

1905

textlen = len(rawtext)

1916

textlen = len(rawtext)

1906

1917

1907

# should we try to build a delta?

1918

# should we try to build a delta?

1908

if prev != nullrev and self.storedeltachains:

1919

if prev != nullrev and self.storedeltachains:

1909

tested = set()

1920

tested = set()

1910

# This condition is true most of the time when processing

1921

# This condition is true most of the time when processing

1911

# changegroup data into a generaldelta repo. The only time it

1922

# changegroup data into a generaldelta repo. The only time it

1912

# isn't true is if this is the first revision in a delta chain

1923

# isn't true is if this is the first revision in a delta chain

1913

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1924

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1914

if cachedelta and self._generaldelta and self._lazydeltabase:

1925

if cachedelta and self._generaldelta and self._lazydeltabase:

1915

# Assume what we received from the server is a good choice

1926

# Assume what we received from the server is a good choice

1916

# build delta will reuse the cache

1927

# build delta will reuse the cache

1917

candidatedelta = builddelta(cachedelta[0])

1928

candidatedelta = builddelta(cachedelta[0])

1918

tested.add(cachedelta[0])

1929

tested.add(cachedelta[0])

1919

if self._isgooddelta(candidatedelta, textlen):

1930

if self._isgooddelta(candidatedelta, textlen):

1920

delta = candidatedelta

1931

delta = candidatedelta

1921

if delta is None and self._generaldelta:

1932

if delta is None and self._generaldelta:

1922

# exclude already lazy tested base if any

1933

# exclude already lazy tested base if any

1923

parents = [p for p in (p1r, p2r)

1934

parents = [p for p in (p1r, p2r)

1924

if p != nullrev and p not in tested]

1935

if p != nullrev and p not in tested]

1925

if parents and not self._aggressivemergedeltas:

1936

if parents and not self._aggressivemergedeltas:

1926

# Pick whichever parent is closer to us (to minimize the

1937

# Pick whichever parent is closer to us (to minimize the

1927

# chance of having to build a fulltext).

1938

# chance of having to build a fulltext).

1928

parents = [max(parents)]

1939

parents = [max(parents)]

1929

tested.update(parents)

1940

tested.update(parents)

1930

pdeltas = []

1941

pdeltas = []

1931

for p in parents:

1942

for p in parents:

1932

pd = builddelta(p)

1943

pd = builddelta(p)

1933

if self._isgooddelta(pd, textlen):

1944

if self._isgooddelta(pd, textlen):

1934

pdeltas.append(pd)

1945

pdeltas.append(pd)

1935

if pdeltas:

1946

if pdeltas:

1936

delta = min(pdeltas, key=lambda x: x[1])

1947

delta = min(pdeltas, key=lambda x: x[1])

1937

if delta is None and prev not in tested:

1948

if delta is None and prev not in tested:

1938

# other approach failed try against prev to hopefully save us a

1949

# other approach failed try against prev to hopefully save us a

1939

# fulltext.

1950

# fulltext.

1940

candidatedelta = builddelta(prev)

1951

candidatedelta = builddelta(prev)

1941

if self._isgooddelta(candidatedelta, textlen):

1952

if self._isgooddelta(candidatedelta, textlen):

1942

delta = candidatedelta

1953

delta = candidatedelta

1943

if delta is not None:

1954

if delta is not None:

1944

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1955

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1945

else:

1956

else:

1946

rawtext = buildtext()

1957

rawtext = buildtext()

1947

data = self.compress(rawtext)

1958

data = self.compress(rawtext)

1948

l = len(data[1]) + len(data[0])

1959

l = len(data[1]) + len(data[0])

1949

base = chainbase = curr

1960

base = chainbase = curr

1950

1961

1951

e = (offset_type(offset, flags), l, textlen,

1962

e = (offset_type(offset, flags), l, textlen,

1952

base, link, p1r, p2r, node)

1963

base, link, p1r, p2r, node)

1953

self.index.insert(-1, e)

1964

self.index.insert(-1, e)

1954

self.nodemap[node] = curr

1965

self.nodemap[node] = curr

1955

1966

1956

entry = self._io.packentry(e, self.node, self.version, curr)

1967

entry = self._io.packentry(e, self.node, self.version, curr)

1957

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1968

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1958

1969

1959

if alwayscache and rawtext is None:

1970

if alwayscache and rawtext is None:

1960

rawtext = buildtext()

1971

rawtext = buildtext()

1961

1972

1962

if type(rawtext) == str: # only accept immutable objects

1973

if type(rawtext) == str: # only accept immutable objects

1963

self._cache = (node, curr, rawtext)

1974

self._cache = (node, curr, rawtext)

1964

self._chainbasecache[curr] = chainbase

1975

self._chainbasecache[curr] = chainbase

1965

return node

1976

return node

1966

1977

1967

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1978

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1968

# Files opened in a+ mode have inconsistent behavior on various

1979

# Files opened in a+ mode have inconsistent behavior on various

1969

# platforms. Windows requires that a file positioning call be made

1980

# platforms. Windows requires that a file positioning call be made

1970

# when the file handle transitions between reads and writes. See

1981

# when the file handle transitions between reads and writes. See

1971

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1982

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1972

# platforms, Python or the platform itself can be buggy. Some versions

1983

# platforms, Python or the platform itself can be buggy. Some versions

1973

# of Solaris have been observed to not append at the end of the file

1984

# of Solaris have been observed to not append at the end of the file

1974

# if the file was seeked to before the end. See issue4943 for more.

1985

# if the file was seeked to before the end. See issue4943 for more.

1975

#

1986

#

1976

# We work around this issue by inserting a seek() before writing.

1987

# We work around this issue by inserting a seek() before writing.

1977

# Note: This is likely not necessary on Python 3.

1988

# Note: This is likely not necessary on Python 3.

1978

ifh.seek(0, os.SEEK_END)

1989

ifh.seek(0, os.SEEK_END)

1979

if dfh:

1990

if dfh:

1980

dfh.seek(0, os.SEEK_END)

1991

dfh.seek(0, os.SEEK_END)

1981

1992

1982

curr = len(self) - 1

1993

curr = len(self) - 1

1983

if not self._inline:

1994

if not self._inline:

1984

transaction.add(self.datafile, offset)

1995

transaction.add(self.datafile, offset)

1985

transaction.add(self.indexfile, curr * len(entry))

1996

transaction.add(self.indexfile, curr * len(entry))

1986

if data[0]:

1997

if data[0]:

1987

dfh.write(data[0])

1998

dfh.write(data[0])

1988

dfh.write(data[1])

1999

dfh.write(data[1])

1989

ifh.write(entry)

2000

ifh.write(entry)

1990

else:

2001

else:

1991

offset += curr * self._io.size

2002

offset += curr * self._io.size

1992

transaction.add(self.indexfile, offset, curr)

2003

transaction.add(self.indexfile, offset, curr)

1993

ifh.write(entry)

2004

ifh.write(entry)

1994

ifh.write(data[0])

2005

ifh.write(data[0])

1995

ifh.write(data[1])

2006

ifh.write(data[1])

1996

self.checkinlinesize(transaction, ifh)

2007

self.checkinlinesize(transaction, ifh)

1997

2008

1998

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

2009

def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):

1999

"""

2010

"""

2000

add a delta group

2011

add a delta group

2001

2012

2002

given a set of deltas, add them to the revision log. the

2013

given a set of deltas, add them to the revision log. the

2003

first delta is against its parent, which should be in our

2014

first delta is against its parent, which should be in our

2004

log, the rest are against the previous delta.

2015

log, the rest are against the previous delta.

2005

2016

2006

If ``addrevisioncb`` is defined, it will be called with arguments of

2017

If ``addrevisioncb`` is defined, it will be called with arguments of

2007

this revlog and the node that was added.

2018

this revlog and the node that was added.

2008

"""

2019

"""

2009

2020

2010

nodes = []

2021

nodes = []

2011

2022

2012

r = len(self)

2023

r = len(self)

2013

end = 0

2024

end = 0

2014

if r:

2025

if r:

2015

end = self.end(r - 1)

2026

end = self.end(r - 1)

2016

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

2027

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

2017

isize = r * self._io.size

2028

isize = r * self._io.size

2018

if self._inline:

2029

if self._inline:

2019

transaction.add(self.indexfile, end + isize, r)

2030

transaction.add(self.indexfile, end + isize, r)

2020

dfh = None

2031

dfh = None

2021

else:

2032

else:

2022

transaction.add(self.indexfile, isize, r)

2033

transaction.add(self.indexfile, isize, r)

2023

transaction.add(self.datafile, end)

2034

transaction.add(self.datafile, end)

2024

dfh = self.opener(self.datafile, "a+")

2035

dfh = self.opener(self.datafile, "a+")

2025

def flush():

2036

def flush():

2026

if dfh:

2037

if dfh:

2027

dfh.flush()

2038

dfh.flush()

2028

ifh.flush()

2039

ifh.flush()

2029

try:

2040

try:

2030

# loop through our set of deltas

2041

# loop through our set of deltas

2031

for data in deltas:

2042

for data in deltas:

2032

node, p1, p2, linknode, deltabase, delta, flags = data

2043

node, p1, p2, linknode, deltabase, delta, flags = data

2033

link = linkmapper(linknode)

2044

link = linkmapper(linknode)

2034

flags = flags or REVIDX_DEFAULT_FLAGS

2045

flags = flags or REVIDX_DEFAULT_FLAGS

2035

2046

2036

nodes.append(node)

2047

nodes.append(node)

2037

2048

2038

if node in self.nodemap:

2049

if node in self.nodemap:

2039

# this can happen if two branches make the same change

2050

# this can happen if two branches make the same change

2040

continue

2051

continue

2041

2052

2042

for p in (p1, p2):

2053

for p in (p1, p2):

2043

if p not in self.nodemap:

2054

if p not in self.nodemap:

2044

raise LookupError(p, self.indexfile,

2055

raise LookupError(p, self.indexfile,

2045

_('unknown parent'))

2056

_('unknown parent'))

2046

2057

2047

if deltabase not in self.nodemap:

2058

if deltabase not in self.nodemap:

2048

raise LookupError(deltabase, self.indexfile,

2059

raise LookupError(deltabase, self.indexfile,

2049

_('unknown delta base'))

2060

_('unknown delta base'))

2050

2061

2051

baserev = self.rev(deltabase)

2062

baserev = self.rev(deltabase)

2052

2063

2053

if baserev != nullrev and self.iscensored(baserev):

2064

if baserev != nullrev and self.iscensored(baserev):

2054

# if base is censored, delta must be full replacement in a

2065

# if base is censored, delta must be full replacement in a

2055

# single patch operation

2066

# single patch operation

2056

hlen = struct.calcsize(">lll")

2067

hlen = struct.calcsize(">lll")

2057

oldlen = self.rawsize(baserev)

2068

oldlen = self.rawsize(baserev)

2058

newlen = len(delta) - hlen

2069

newlen = len(delta) - hlen

2059

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2070

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

2060

raise error.CensoredBaseError(self.indexfile,

2071

raise error.CensoredBaseError(self.indexfile,

2061

self.node(baserev))

2072

self.node(baserev))

2062

2073

2063

if not flags and self._peek_iscensored(baserev, delta, flush):

2074

if not flags and self._peek_iscensored(baserev, delta, flush):

2064

flags |= REVIDX_ISCENSORED

2075

flags |= REVIDX_ISCENSORED

2065

2076

2066

# We assume consumers of addrevisioncb will want to retrieve

2077

# We assume consumers of addrevisioncb will want to retrieve

2067

# the added revision, which will require a call to

2078

# the added revision, which will require a call to

2068

# revision(). revision() will fast path if there is a cache

2079

# revision(). revision() will fast path if there is a cache

2069

# hit. So, we tell _addrevision() to always cache in this case.

2080

# hit. So, we tell _addrevision() to always cache in this case.

2070

# We're only using addgroup() in the context of changegroup

2081

# We're only using addgroup() in the context of changegroup

2071

# generation so the revision data can always be handled as raw

2082

# generation so the revision data can always be handled as raw

2072

# by the flagprocessor.

2083

# by the flagprocessor.

2073

self._addrevision(node, None, transaction, link,

2084

self._addrevision(node, None, transaction, link,

2074

p1, p2, flags, (baserev, delta),

2085

p1, p2, flags, (baserev, delta),

2075

ifh, dfh,

2086

ifh, dfh,

2076

alwayscache=bool(addrevisioncb))

2087

alwayscache=bool(addrevisioncb))

2077

2088

2078

if addrevisioncb:

2089

if addrevisioncb:

2079

addrevisioncb(self, node)

2090

addrevisioncb(self, node)

2080

2091

2081

if not dfh and not self._inline:

2092

if not dfh and not self._inline:

2082

# addrevision switched from inline to conventional

2093

# addrevision switched from inline to conventional

2083

# reopen the index

2094

# reopen the index

2084

ifh.close()

2095

ifh.close()

2085

dfh = self.opener(self.datafile, "a+")

2096

dfh = self.opener(self.datafile, "a+")

2086

ifh = self.opener(self.indexfile, "a+",

2097

ifh = self.opener(self.indexfile, "a+",

2087

checkambig=self._checkambig)

2098

checkambig=self._checkambig)

2088

finally:

2099

finally:

2089

if dfh:

2100

if dfh:

2090

dfh.close()

2101

dfh.close()

2091

ifh.close()

2102

ifh.close()

2092

2103

2093

return nodes

2104

return nodes

2094

2105

2095

def iscensored(self, rev):

2106

def iscensored(self, rev):

2096

"""Check if a file revision is censored."""

2107

"""Check if a file revision is censored."""

2097

return False

2108

return False

2098

2109

2099

def _peek_iscensored(self, baserev, delta, flush):

2110

def _peek_iscensored(self, baserev, delta, flush):

2100

"""Quickly check if a delta produces a censored revision."""

2111

"""Quickly check if a delta produces a censored revision."""

2101

return False

2112

return False

2102

2113

2103

def getstrippoint(self, minlink):

2114

def getstrippoint(self, minlink):

2104

"""find the minimum rev that must be stripped to strip the linkrev

2115

"""find the minimum rev that must be stripped to strip the linkrev

2105

2116

2106

Returns a tuple containing the minimum rev and a set of all revs that

2117

Returns a tuple containing the minimum rev and a set of all revs that

2107

have linkrevs that will be broken by this strip.

2118

have linkrevs that will be broken by this strip.

2108

"""

2119

"""

2109

brokenrevs = set()

2120

brokenrevs = set()

2110

strippoint = len(self)

2121

strippoint = len(self)

2111

2122

2112

heads = {}

2123

heads = {}

2113

futurelargelinkrevs = set()

2124

futurelargelinkrevs = set()

2114

for head in self.headrevs():

2125

for head in self.headrevs():

2115

headlinkrev = self.linkrev(head)

2126

headlinkrev = self.linkrev(head)

2116

heads[head] = headlinkrev

2127

heads[head] = headlinkrev

2117

if headlinkrev >= minlink:

2128

if headlinkrev >= minlink:

2118

futurelargelinkrevs.add(headlinkrev)

2129

futurelargelinkrevs.add(headlinkrev)

2119

2130

2120

# This algorithm involves walking down the rev graph, starting at the

2131

# This algorithm involves walking down the rev graph, starting at the

2121

# heads. Since the revs are topologically sorted according to linkrev,

2132

# heads. Since the revs are topologically sorted according to linkrev,

2122

# once all head linkrevs are below the minlink, we know there are

2133

# once all head linkrevs are below the minlink, we know there are

2123

# no more revs that could have a linkrev greater than minlink.

2134

# no more revs that could have a linkrev greater than minlink.

2124

# So we can stop walking.

2135

# So we can stop walking.

2125

while futurelargelinkrevs:

2136

while futurelargelinkrevs:

2126

strippoint -= 1

2137

strippoint -= 1

2127

linkrev = heads.pop(strippoint)

2138

linkrev = heads.pop(strippoint)

2128

2139

2129

if linkrev < minlink:

2140

if linkrev < minlink:

2130

brokenrevs.add(strippoint)

2141

brokenrevs.add(strippoint)

2131

else:

2142

else:

2132

futurelargelinkrevs.remove(linkrev)

2143

futurelargelinkrevs.remove(linkrev)

2133

2144

2134

for p in self.parentrevs(strippoint):

2145

for p in self.parentrevs(strippoint):

2135

if p != nullrev:

2146

if p != nullrev:

2136

plinkrev = self.linkrev(p)

2147

plinkrev = self.linkrev(p)

2137

heads[p] = plinkrev

2148

heads[p] = plinkrev

2138

if plinkrev >= minlink:

2149

if plinkrev >= minlink:

2139

futurelargelinkrevs.add(plinkrev)

2150

futurelargelinkrevs.add(plinkrev)

2140

2151

2141

return strippoint, brokenrevs

2152

return strippoint, brokenrevs

2142

2153

2143

def strip(self, minlink, transaction):

2154

def strip(self, minlink, transaction):

2144

"""truncate the revlog on the first revision with a linkrev >= minlink

2155

"""truncate the revlog on the first revision with a linkrev >= minlink

2145

2156

2146

This function is called when we're stripping revision minlink and

2157

This function is called when we're stripping revision minlink and

2147

its descendants from the repository.

2158

its descendants from the repository.

2148

2159

2149

We have to remove all revisions with linkrev >= minlink, because

2160

We have to remove all revisions with linkrev >= minlink, because

2150

the equivalent changelog revisions will be renumbered after the

2161

the equivalent changelog revisions will be renumbered after the

2151

strip.

2162

strip.

2152

2163

2153

So we truncate the revlog on the first of these revisions, and

2164

So we truncate the revlog on the first of these revisions, and

2154

trust that the caller has saved the revisions that shouldn't be

2165

trust that the caller has saved the revisions that shouldn't be

2155

removed and that it'll re-add them after this truncation.

2166

removed and that it'll re-add them after this truncation.

2156

"""

2167

"""

2157

if len(self) == 0:

2168

if len(self) == 0:

2158

return

2169

return

2159

2170

2160

rev, _ = self.getstrippoint(minlink)

2171

rev, _ = self.getstrippoint(minlink)

2161

if rev == len(self):

2172

if rev == len(self):

2162

return

2173

return

2163

2174

2164

# first truncate the files on disk

2175

# first truncate the files on disk

2165

end = self.start(rev)

2176

end = self.start(rev)

2166

if not self._inline:

2177

if not self._inline:

2167

transaction.add(self.datafile, end)

2178

transaction.add(self.datafile, end)

2168

end = rev * self._io.size

2179

end = rev * self._io.size

2169

else:

2180

else:

2170

end += rev * self._io.size

2181

end += rev * self._io.size

2171

2182

2172

transaction.add(self.indexfile, end)

2183

transaction.add(self.indexfile, end)

2173

2184

2174

# then reset internal state in memory to forget those revisions

2185

# then reset internal state in memory to forget those revisions

2175

self._cache = None

2186

self._cache = None

2176

self._chaininfocache = {}

2187

self._chaininfocache = {}

2177

self._chunkclear()

2188

self._chunkclear()

2178

for x in xrange(rev, len(self)):

2189

for x in xrange(rev, len(self)):

2179

del self.nodemap[self.node(x)]

2190

del self.nodemap[self.node(x)]

2180

2191

2181

del self.index[rev:-1]

2192

del self.index[rev:-1]

2182

2193

2183

def checksize(self):

2194

def checksize(self):

2184

expected = 0

2195

expected = 0

2185

if len(self):

2196

if len(self):

2186

expected = max(0, self.end(len(self) - 1))

2197

expected = max(0, self.end(len(self) - 1))

2187

2198

2188

try:

2199

try:

2189

f = self.opener(self.datafile)

2200

f = self.opener(self.datafile)

2190

f.seek(0, 2)

2201

f.seek(0, 2)

2191

actual = f.tell()

2202

actual = f.tell()

2192

f.close()

2203

f.close()

2193

dd = actual - expected

2204

dd = actual - expected

2194

except IOError as inst:

2205

except IOError as inst:

2195

if inst.errno != errno.ENOENT:

2206

if inst.errno != errno.ENOENT:

2196

raise

2207

raise

2197

dd = 0

2208

dd = 0

2198

2209

2199

try:

2210

try:

2200

f = self.opener(self.indexfile)

2211

f = self.opener(self.indexfile)

2201

f.seek(0, 2)

2212

f.seek(0, 2)

2202

actual = f.tell()

2213

actual = f.tell()

2203

f.close()

2214

f.close()

2204

s = self._io.size

2215

s = self._io.size

2205

i = max(0, actual // s)

2216

i = max(0, actual // s)

2206

di = actual - (i * s)

2217

di = actual - (i * s)

2207

if self._inline:

2218

if self._inline:

2208

databytes = 0

2219

databytes = 0

2209

for r in self:

2220

for r in self:

2210

databytes += max(0, self.length(r))

2221

databytes += max(0, self.length(r))

2211

dd = 0

2222

dd = 0

2212

di = actual - len(self) * s - databytes

2223

di = actual - len(self) * s - databytes

2213

except IOError as inst:

2224

except IOError as inst:

2214

if inst.errno != errno.ENOENT:

2225

if inst.errno != errno.ENOENT:

2215

raise

2226

raise

2216

di = 0

2227

di = 0

2217

2228

2218

return (dd, di)

2229

return (dd, di)

2219

2230

2220

def files(self):

2231

def files(self):

2221

res = [self.indexfile]

2232

res = [self.indexfile]

2222

if not self._inline:

2233

if not self._inline:

2223

res.append(self.datafile)

2234

res.append(self.datafile)

2224

return res

2235

return res

2225

2236

2226

DELTAREUSEALWAYS = 'always'

2237

DELTAREUSEALWAYS = 'always'

2227

DELTAREUSESAMEREVS = 'samerevs'

2238

DELTAREUSESAMEREVS = 'samerevs'

2228

DELTAREUSENEVER = 'never'

2239

DELTAREUSENEVER = 'never'

2229

2240

2230

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2241

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2231

2242

2232

def clone(self, tr, destrevlog, addrevisioncb=None,

2243

def clone(self, tr, destrevlog, addrevisioncb=None,

2233

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2244

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2234

"""Copy this revlog to another, possibly with format changes.

2245

"""Copy this revlog to another, possibly with format changes.

2235

2246

2236

The destination revlog will contain the same revisions and nodes.

2247

The destination revlog will contain the same revisions and nodes.

2237

However, it may not be bit-for-bit identical due to e.g. delta encoding

2248

However, it may not be bit-for-bit identical due to e.g. delta encoding

2238

differences.

2249

differences.

2239

2250

2240

The ``deltareuse`` argument control how deltas from the existing revlog

2251

The ``deltareuse`` argument control how deltas from the existing revlog

2241

are preserved in the destination revlog. The argument can have the

2252

are preserved in the destination revlog. The argument can have the

2242

following values:

2253

following values:

2243

2254

2244

DELTAREUSEALWAYS

2255

DELTAREUSEALWAYS

2245

Deltas will always be reused (if possible), even if the destination

2256

Deltas will always be reused (if possible), even if the destination

2246

revlog would not select the same revisions for the delta. This is the

2257

revlog would not select the same revisions for the delta. This is the

2247

fastest mode of operation.

2258

fastest mode of operation.

2248

DELTAREUSESAMEREVS

2259

DELTAREUSESAMEREVS

2249

Deltas will be reused if the destination revlog would pick the same

2260

Deltas will be reused if the destination revlog would pick the same

2250

revisions for the delta. This mode strikes a balance between speed

2261

revisions for the delta. This mode strikes a balance between speed

2251

and optimization.

2262

and optimization.

2252

DELTAREUSENEVER

2263

DELTAREUSENEVER

2253

Deltas will never be reused. This is the slowest mode of execution.

2264

Deltas will never be reused. This is the slowest mode of execution.

2254

This mode can be used to recompute deltas (e.g. if the diff/delta

2265

This mode can be used to recompute deltas (e.g. if the diff/delta

2255

algorithm changes).

2266

algorithm changes).

2256

2267

2257

Delta computation can be slow, so the choice of delta reuse policy can

2268

Delta computation can be slow, so the choice of delta reuse policy can

2258

significantly affect run time.

2269

significantly affect run time.

2259

2270

2260

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2271

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2261

two extremes. Deltas will be reused if they are appropriate. But if the

2272

two extremes. Deltas will be reused if they are appropriate. But if the

2262

delta could choose a better revision, it will do so. This means if you

2273

delta could choose a better revision, it will do so. This means if you

2263

are converting a non-generaldelta revlog to a generaldelta revlog,

2274

are converting a non-generaldelta revlog to a generaldelta revlog,

2264

deltas will be recomputed if the delta's parent isn't a parent of the

2275

deltas will be recomputed if the delta's parent isn't a parent of the

2265

revision.

2276

revision.

2266

2277

2267

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2278

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2268

controls whether to compute deltas against both parents for merges.

2279

controls whether to compute deltas against both parents for merges.

2269

By default, the current default is used.

2280

By default, the current default is used.

2270

"""

2281

"""

2271

if deltareuse not in self.DELTAREUSEALL:

2282

if deltareuse not in self.DELTAREUSEALL:

2272

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2283

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2273

2284

2274

if len(destrevlog):

2285

if len(destrevlog):

2275

raise ValueError(_('destination revlog is not empty'))

2286

raise ValueError(_('destination revlog is not empty'))

2276

2287

2277

if getattr(self, 'filteredrevs', None):

2288

if getattr(self, 'filteredrevs', None):

2278

raise ValueError(_('source revlog has filtered revisions'))

2289

raise ValueError(_('source revlog has filtered revisions'))

2279

if getattr(destrevlog, 'filteredrevs', None):

2290

if getattr(destrevlog, 'filteredrevs', None):

2280

raise ValueError(_('destination revlog has filtered revisions'))

2291

raise ValueError(_('destination revlog has filtered revisions'))

2281

2292

2282

# lazydeltabase controls whether to reuse a cached delta, if possible.

2293

# lazydeltabase controls whether to reuse a cached delta, if possible.

2283

oldlazydeltabase = destrevlog._lazydeltabase

2294

oldlazydeltabase = destrevlog._lazydeltabase

2284

oldamd = destrevlog._aggressivemergedeltas

2295

oldamd = destrevlog._aggressivemergedeltas

2285

2296

2286

try:

2297

try:

2287

if deltareuse == self.DELTAREUSEALWAYS:

2298

if deltareuse == self.DELTAREUSEALWAYS:

2288

destrevlog._lazydeltabase = True

2299

destrevlog._lazydeltabase = True

2289

elif deltareuse == self.DELTAREUSESAMEREVS:

2300

elif deltareuse == self.DELTAREUSESAMEREVS:

2290

destrevlog._lazydeltabase = False

2301

destrevlog._lazydeltabase = False

2291

2302

2292

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2303

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2293

2304

2294

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2305

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2295

self.DELTAREUSESAMEREVS)

2306

self.DELTAREUSESAMEREVS)

2296

2307

2297

index = self.index

2308

index = self.index

2298

for rev in self:

2309

for rev in self:

2299

entry = index[rev]

2310

entry = index[rev]

2300

2311

2301

# Some classes override linkrev to take filtered revs into

2312

# Some classes override linkrev to take filtered revs into

2302

# account. Use raw entry from index.

2313

# account. Use raw entry from index.

2303

flags = entry[0] & 0xffff

2314

flags = entry[0] & 0xffff

2304

linkrev = entry[4]

2315

linkrev = entry[4]

2305

p1 = index[entry[5]][7]

2316

p1 = index[entry[5]][7]

2306

p2 = index[entry[6]][7]

2317

p2 = index[entry[6]][7]

2307

node = entry[7]

2318

node = entry[7]

2308

2319

2309

# (Possibly) reuse the delta from the revlog if allowed and

2320

# (Possibly) reuse the delta from the revlog if allowed and

2310

# the revlog chunk is a delta.

2321

# the revlog chunk is a delta.

2311

cachedelta = None

2322

cachedelta = None

2312

rawtext = None

2323

rawtext = None

2313

if populatecachedelta:

2324

if populatecachedelta:

2314

dp = self.deltaparent(rev)

2325

dp = self.deltaparent(rev)

2315

if dp != nullrev:

2326

if dp != nullrev:

2316

cachedelta = (dp, str(self._chunk(rev)))

2327

cachedelta = (dp, str(self._chunk(rev)))

2317

2328

2318

if not cachedelta:

2329

if not cachedelta:

2319

rawtext = self.revision(rev, raw=True)

2330

rawtext = self.revision(rev, raw=True)

2320

2331

2321

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2332

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2322

checkambig=False)

2333

checkambig=False)

2323

dfh = None

2334

dfh = None

2324

if not destrevlog._inline:

2335

if not destrevlog._inline:

2325

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2336

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2326

try:

2337

try:

2327

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2338

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2328

flags, cachedelta, ifh, dfh)

2339

flags, cachedelta, ifh, dfh)

2329

finally:

2340

finally:

2330

if dfh:

2341

if dfh:

2331

dfh.close()

2342

dfh.close()

2332

ifh.close()

2343

ifh.close()

2333

2344

2334

if addrevisioncb:

2345

if addrevisioncb:

2335

addrevisioncb(self, rev, node)

2346

addrevisioncb(self, rev, node)

2336

finally:

2347

finally:

2337

destrevlog._lazydeltabase = oldlazydeltabase

2348

destrevlog._lazydeltabase = oldlazydeltabase

2338

destrevlog._aggressivemergedeltas = oldamd

2349

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import errno
             import hashlib
+            import heapq
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
                 wdirhex,
                 wdirid,
                 wdirrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             parsers = policy.importmod(r'parsers')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGV1 = 1
             # Dummy value until file format is finalized.
             # Reminder: change the bounds check in revlog.__init__ when this is changed.
             REVLOGV2 = 0xDEAD
             FLAG_INLINE_DATA = (1 << 16)
             FLAG_GENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
             REVLOG_DEFAULT_FORMAT = REVLOGV1
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
             REVLOGV2_FLAGS = REVLOGV1_FLAGS
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     if p1 < p2:
                         a = p1
                         b = p2
                     else:
                         a = p2
                         b = p1
                     s = hashlib.sha1(a)
                     s.update(b)
                 s.update(text)
                 return s.digest()
             def _slicechunk(revlog, revs):
                 """slice revs to reduce the amount of unrelated data to be read from disk.
                 ``revs`` is sliced into groups that should be read in one time.
                 Assume that revs are sorted.
                 """
                 start = revlog.start
                 length = revlog.length
-                chunkqueue = collections.deque()
+                if len(revs) <= 1:
-                chunkqueue.append((revs, 0))
+                    yield revs
+                    return
-                while chunkqueue:
-                    revs, depth = chunkqueue.popleft()
                 startbyte = start(revs[0])
                 endbyte = start(revs[-1]) + length(revs[-1])
-                    deltachainspan = endbyte - startbyte
+                readdata = deltachainspan = endbyte - startbyte
-                    if deltachainspan <= revlog._srminblocksize or len(revs) <= 1:
+                chainpayload = sum(length(r) for r in revs)
-                        yield revs
-                        continue
-                    # Find where is the largest hole (this is where we would split) and
+                if deltachainspan:
-                    # sum up the lengths of useful data to compute the density of the span
+                    density = chainpayload / float(deltachainspan)
-                    textlen = 0
+                else:
+                    density = 1.0
+                # Store the gaps in a heap to have them sorted by decreasing size
+                gapsheap = []
+                heapq.heapify(gapsheap)
                 prevend = None
-                    largesthole = 0
-                    idxlargesthole = -1
                 for i, rev in enumerate(revs):
                     revstart = start(rev)
                     revlen = length(rev)
                     if prevend is not None:
-                            hole = revstart - prevend
+                        gapsize = revstart - prevend
-                            if hole > largesthole:
+                        if gapsize:
-                                largesthole = hole
+                            heapq.heappush(gapsheap, (-gapsize, i))
-                                idxlargesthole = i
-                        textlen += revlen
                     prevend = revstart + revlen
-                    density = textlen / float(deltachainspan) if deltachainspan > 0 else 1.0
+                # Collect the indices of the largest holes until the density is acceptable
+                indicesheap = []
+                heapq.heapify(indicesheap)
+                while gapsheap and density < revlog._srdensitythreshold:
+                    oppgapsize, gapidx = heapq.heappop(gapsheap)
+                    heapq.heappush(indicesheap, gapidx)
-                    if density > revlog._srdensitythreshold:
+                    # the gap sizes are stored as negatives to be sorted decreasingly
-                        yield revs
+                    # by the heap
-                        continue
+                    readdata -= (-oppgapsize)
+                    if readdata > 0:
+                        density = chainpayload / float(readdata)
+                    else:
+                        density = 1.0
-                    # Add the left and right parts so that they will be sliced
+                # Cut the revs at collected indices
-                    # recursively too
+                previdx = 0
-                    chunkqueue.append((revs[:idxlargesthole], depth + 1))
+                while indicesheap:
-                    chunkqueue.append((revs[idxlargesthole:], depth + 1))
+                    idx = heapq.heappop(indicesheap)
+                    yield revs[previdx:idx]
+                    previdx = idx
+                yield revs[previdx:]
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = struct.Struct(">4l20s20s20s")
             indexformatv0_pack = indexformatv0.pack
             indexformatv0_unpack = indexformatv0.unpack
             class revlogoldio(object):
                 def __init__(self):
                     self.size = indexformatv0.size
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = indexformatv0_unpack(cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_('index entry flags need revlog version 1'))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return indexformatv0_pack(*e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = struct.Struct(">Qiiiiii20s12x")
             indexformatng_pack = indexformatng.pack
             versionformat = struct.Struct(">I")
             versionformat_pack = versionformat.pack
             versionformat_unpack = versionformat.unpack
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = indexformatng.size
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = indexformatng_pack(*entry)
                     if rev == 0:
                         p = versionformat_pack(version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 """
                 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
                              mmaplargeindex=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._srdensitythreshold = 0.25
                     self._srminblocksize = 262144
                     mmapindexthreshold = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv2' in opts:
                             # version 2 revlogs always use generaldelta.
                             v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
                         elif 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= FLAG_GENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                         if 'maxdeltachainspan' in opts:
                             self._maxdeltachainspan = opts['maxdeltachainspan']
                         if mmaplargeindex and 'mmapindexthreshold' in opts:
                             mmapindexthreshold = opts['mmapindexthreshold']
                         self._withsparseread = bool(opts.get('with-sparse-read', False))
                         if 'sparse-read-density-threshold' in opts:
                             self._srdensitythreshold = opts['sparse-read-density-threshold']
                         if 'sparse-read-min-block-size' in opts:
                             self._srminblocksize = opts['sparse-read-min-block-size']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         if (mmapindexthreshold is not None and
                                 self.opener.fstat(f).st_size >= mmapindexthreshold):
                             indexdata = util.buffer(util.mmapread(f))
                         else:
                             indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = versionformat_unpack(indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & FLAG_INLINE_DATA
                     self._generaldelta = v & FLAG_GENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV1:
                         if flags & ~REVLOGV1_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     elif fmt == REVLOGV2:
                         if flags & ~REVLOGV2_FLAGS:
                             raise RevlogError(_('unknown flags (%#04x) in version %d '
                                                 'revlog %s') %
                                               (flags >> 16, fmt, self.indexfile))
                     else:
                         raise RevlogError(_('unknown version (%d) in revlog %s') %
                                           (fmt, self.indexfile))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         if node == wdirid:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         if node == wdirid:
                             raise error.WdirUnsupported
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         return self.index[rev][5:7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return pycompat.maplist(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     maybewdir = wdirhex.startswith(id)
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 raise RevlogError
                             return partial
                         if maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except (TypeError, binascii.Error):
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def shortest(self, hexnode, minlength=1):
                     """Find the shortest unambiguous prefix that matches hexnode."""
                     def isvalid(test):
                         try:
                             if self._partialmatch(test) is None:
                                 return False
                             try:
                                 i = int(test)
                                 # if we are a pure int, then starting with zero will not be
                                 # confused as a rev; or, obviously, if the int is larger
                                 # than the value of the tip rev
                                 if test[0] == '0' or i > len(self):
                                     return True
                                 return False
                             except ValueError:
                                 return True
                         except error.RevlogError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                     shortest = hexnode
                     startlength = max(6, minlength)
                     length = startlength
                     while True:
                         test = hexnode[:length]
                         if isvalid(test):
                             shortest = test
                             if length == minlength or length > startlength:
                                 return shortest
                             length -= 1
                         else:
                             length += 1
                             if len(shortest) <= length:
                                 return shortest
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = _slicechunk(self, revs)
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, pycompat.bytestr(revornode)))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~FLAG_INLINE_DATA
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     defaultmax = textlen * 4
                     maxdist = self._maxdeltachainspan
                     if not maxdist:
                         maxdist = dist # ensure the conditional pass
                     maxdist = max(maxdist, defaultmax)
                     if (dist > maxdist or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == nullid:
                         raise RevlogError(_("%s: attempt to add null revision") %
                                           (self.indexfile))
                     if node == wdirid:
                         raise RevlogError(_("%s: attempt to add wdir revision") %
                                           (self.indexfile))
                     btext = [rawtext]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(baserev, _df=fh, raw=True)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=True)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(rev, _df=fh, raw=True)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         rawtext = buildtext()
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = buildtext()
                     if type(rawtext) == str: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     nodes = []
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         for data in deltas:
                             node, p1, p2, linknode, deltabase, delta, flags = data
                             link = linkmapper(linknode)
                             flags = flags or REVIDX_DEFAULT_FLAGS
                             nodes.append(node)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             self._addrevision(node, None, transaction, link,
                                               p1, p2, flags, (baserev, delta),
                                               ifh, dfh,
                                               alwayscache=bool(addrevisioncb))
                             if addrevisioncb:
                                 addrevisioncb(self, node)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return nodes
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = {'always', 'samerevs', 'never'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd