upstream/mercurial-mirror Commit - r8016:baaa832f

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import bin, hex, nullid, nullrev, short

13

from node import bin, hex, nullid, nullrev, short

14

from i18n import _

14

from i18n import _

15

import changegroup, errno, ancestor, mdiff, parsers

15

import changegroup, errno, ancestor, mdiff, parsers

16

import struct, util, zlib, error

16

import struct, util, zlib, error

17

18

_pack = struct.pack

18

_pack = struct.pack

19

_unpack = struct.unpack

19

_unpack = struct.unpack

20

_compress = zlib.compress

20

_compress = zlib.compress

21

_decompress = zlib.decompress

21

_decompress = zlib.decompress

22

_sha = util.sha1

22

_sha = util.sha1

23

24

# revlog flags

24

# revlog flags

25

REVLOGV0 = 0

25

REVLOGV0 = 0

26

REVLOGNG = 1

26

REVLOGNG = 1

27

REVLOGNGINLINEDATA = (1 << 16)

27

REVLOGNGINLINEDATA = (1 << 16)

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

31

32

RevlogError = error.RevlogError

32

RevlogError = error.RevlogError

33

LookupError = error.LookupError

33

LookupError = error.LookupError

34

35

def getoffset(q):

35

def getoffset(q):

36

return int(q >> 16)

36

return int(q >> 16)

37

38

def gettype(q):

38

def gettype(q):

39

return int(q & 0xFFFF)

39

return int(q & 0xFFFF)

40

41

def offset_type(offset, type):

41

def offset_type(offset, type):

42

return long(long(offset) << 16 | type)

42

return long(long(offset) << 16 | type)

43

44

def hash(text, p1, p2):

44

def hash(text, p1, p2):

45

"""generate a hash from the given text and its parent hashes

45

"""generate a hash from the given text and its parent hashes

46

47

This hash combines both the current file contents and its history

47

This hash combines both the current file contents and its history

48

in a manner that makes it easy to distinguish nodes with the same

48

in a manner that makes it easy to distinguish nodes with the same

49

content in the revision graph.

49

content in the revision graph.

50

"""

50

"""

51

l = [p1, p2]

51

l = [p1, p2]

52

l.sort()

52

l.sort()

53

s = _sha(l[0])

53

s = _sha(l[0])

54

s.update(l[1])

54

s.update(l[1])

55

s.update(text)

55

s.update(text)

56

return s.digest()

56

return s.digest()

57

58

def compress(text):

58

def compress(text):

59

""" generate a possibly-compressed representation of text """

59

""" generate a possibly-compressed representation of text """

60

if not text:

60

if not text:

61

return ("", text)

61

return ("", text)

62

l = len(text)

62

l = len(text)

63

bin = None

63

bin = None

64

if l < 44:

64

if l < 44:

65

pass

65

pass

66

elif l > 1000000:

66

elif l > 1000000:

67

# zlib makes an internal copy, thus doubling memory usage for

67

# zlib makes an internal copy, thus doubling memory usage for

68

# large files, so lets do this in pieces

68

# large files, so lets do this in pieces

69

z = zlib.compressobj()

69

z = zlib.compressobj()

70

p = []

70

p = []

71

pos = 0

71

pos = 0

72

while pos < l:

72

while pos < l:

73

pos2 = pos + 2**20

73

pos2 = pos + 2**20

74

p.append(z.compress(text[pos:pos2]))

74

p.append(z.compress(text[pos:pos2]))

75

pos = pos2

75

pos = pos2

76

p.append(z.flush())

76

p.append(z.flush())

77

if sum(map(len, p)) < l:

77

if sum(map(len, p)) < l:

78

bin = "".join(p)

78

bin = "".join(p)

79

else:

79

else:

80

bin = _compress(text)

80

bin = _compress(text)

81

if bin is None or len(bin) > l:

81

if bin is None or len(bin) > l:

82

if text[0] == '\0':

82

if text[0] == '\0':

83

return ("", text)

83

return ("", text)

84

return ('u', text)

84

return ('u', text)

85

return ("", bin)

85

return ("", bin)

86

87

def decompress(bin):

87

def decompress(bin):

88

""" decompress the given input """

88

""" decompress the given input """

89

if not bin:

89

if not bin:

90

return bin

90

return bin

91

t = bin[0]

91

t = bin[0]

92

if t == '\0':

92

if t == '\0':

93

return bin

93

return bin

94

if t == 'x':

94

if t == 'x':

95

return _decompress(bin)

95

return _decompress(bin)

96

if t == 'u':

96

if t == 'u':

97

return bin[1:]

97

return bin[1:]

98

raise RevlogError(_("unknown compression type %r") % t)

98

raise RevlogError(_("unknown compression type %r") % t)

99

100

class lazyparser(object):

100

class lazyparser(object):

101

"""

101

"""

102

this class avoids the need to parse the entirety of large indices

102

this class avoids the need to parse the entirety of large indices

103

"""

103

"""

104

105

# lazyparser is not safe to use on windows if win32 extensions not

105

# lazyparser is not safe to use on windows if win32 extensions not

106

# available. it keeps file handle open, which make it not possible

106

# available. it keeps file handle open, which make it not possible

107

# to break hardlinks on local cloned repos.

107

# to break hardlinks on local cloned repos.

108

109

def __init__(self, dataf, size):

109

def __init__(self, dataf, size):

110

self.dataf = dataf

110

self.dataf = dataf

111

self.s = struct.calcsize(indexformatng)

111

self.s = struct.calcsize(indexformatng)

112

self.datasize = size

112

self.datasize = size

113

self.l = size/self.s

113

self.l = size/self.s

114

self.index = [None] * self.l

114

self.index = [None] * self.l

115

self.map = {nullid: nullrev}

115

self.map = {nullid: nullrev}

116

self.allmap = 0

116

self.allmap = 0

117

self.all = 0

117

self.all = 0

118

self.mapfind_count = 0

118

self.mapfind_count = 0

119

120

def loadmap(self):

120

def loadmap(self):

121

"""

121

"""

122

during a commit, we need to make sure the rev being added is

122

during a commit, we need to make sure the rev being added is

123

not a duplicate. This requires loading the entire index,

123

not a duplicate. This requires loading the entire index,

124

which is fairly slow. loadmap can load up just the node map,

124

which is fairly slow. loadmap can load up just the node map,

125

which takes much less time.

125

which takes much less time.

126

"""

126

"""

127

if self.allmap:

127

if self.allmap:

128

return

128

return

129

end = self.datasize

129

end = self.datasize

130

self.allmap = 1

130

self.allmap = 1

131

cur = 0

131

cur = 0

132

count = 0

132

count = 0

133

blocksize = self.s * 256

133

blocksize = self.s * 256

134

self.dataf.seek(0)

134

self.dataf.seek(0)

135

while cur < end:

135

while cur < end:

136

data = self.dataf.read(blocksize)

136

data = self.dataf.read(blocksize)

137

off = 0

137

off = 0

138

for x in xrange(256):

138

for x in xrange(256):

139

n = data[off + ngshaoffset:off + ngshaoffset + 20]

139

n = data[off + ngshaoffset:off + ngshaoffset + 20]

140

self.map[n] = count

140

self.map[n] = count

141

count += 1

141

count += 1

142

if count >= self.l:

142

if count >= self.l:

143

break

143

break

144

off += self.s

144

off += self.s

145

cur += blocksize

145

cur += blocksize

146

147

def loadblock(self, blockstart, blocksize, data=None):

147

def loadblock(self, blockstart, blocksize, data=None):

148

if self.all:

148

if self.all:

149

return

149

return

150

if data is None:

150

if data is None:

151

self.dataf.seek(blockstart)

151

self.dataf.seek(blockstart)

152

if blockstart + blocksize > self.datasize:

152

if blockstart + blocksize > self.datasize:

153

# the revlog may have grown since we've started running,

153

# the revlog may have grown since we've started running,

154

# but we don't have space in self.index for more entries.

154

# but we don't have space in self.index for more entries.

155

# limit blocksize so that we don't get too much data.

155

# limit blocksize so that we don't get too much data.

156

blocksize = max(self.datasize - blockstart, 0)

156

blocksize = max(self.datasize - blockstart, 0)

157

data = self.dataf.read(blocksize)

157

data = self.dataf.read(blocksize)

158

lend = len(data) / self.s

158

lend = len(data) / self.s

159

i = blockstart / self.s

159

i = blockstart / self.s

160

off = 0

160

off = 0

161

# lazyindex supports __delitem__

161

# lazyindex supports __delitem__

162

if lend > len(self.index) - i:

162

if lend > len(self.index) - i:

163

lend = len(self.index) - i

163

lend = len(self.index) - i

164

for x in xrange(lend):

164

for x in xrange(lend):

165

if self.index[i + x] == None:

165

if self.index[i + x] == None:

166

b = data[off : off + self.s]

166

b = data[off : off + self.s]

167

self.index[i + x] = b

167

self.index[i + x] = b

168

n = b[ngshaoffset:ngshaoffset + 20]

168

n = b[ngshaoffset:ngshaoffset + 20]

169

self.map[n] = i + x

169

self.map[n] = i + x

170

off += self.s

170

off += self.s

171

172

def findnode(self, node):

172

def findnode(self, node):

173

"""search backwards through the index file for a specific node"""

173

"""search backwards through the index file for a specific node"""

174

if self.allmap:

174

if self.allmap:

175

return None

175

return None

176

177

# hg log will cause many many searches for the manifest

177

# hg log will cause many many searches for the manifest

178

# nodes. After we get called a few times, just load the whole

178

# nodes. After we get called a few times, just load the whole

179

# thing.

179

# thing.

180

if self.mapfind_count > 8:

180

if self.mapfind_count > 8:

181

self.loadmap()

181

self.loadmap()

182

if node in self.map:

182

if node in self.map:

183

return node

183

return node

184

return None

184

return None

185

self.mapfind_count += 1

185

self.mapfind_count += 1

186

last = self.l - 1

186

last = self.l - 1

187

while self.index[last] != None:

187

while self.index[last] != None:

188

if last == 0:

188

if last == 0:

189

self.all = 1

189

self.all = 1

190

self.allmap = 1

190

self.allmap = 1

191

return None

191

return None

192

last -= 1

192

last -= 1

193

end = (last + 1) * self.s

193

end = (last + 1) * self.s

194

blocksize = self.s * 256

194

blocksize = self.s * 256

195

while end >= 0:

195

while end >= 0:

196

start = max(end - blocksize, 0)

196

start = max(end - blocksize, 0)

197

self.dataf.seek(start)

197

self.dataf.seek(start)

198

data = self.dataf.read(end - start)

198

data = self.dataf.read(end - start)

199

findend = end - start

199

findend = end - start

200

while True:

200

while True:

201

# we're searching backwards, so we have to make sure

201

# we're searching backwards, so we have to make sure

202

# we don't find a changeset where this node is a parent

202

# we don't find a changeset where this node is a parent

203

off = data.find(node, 0, findend)

203

off = data.find(node, 0, findend)

204

findend = off

204

findend = off

205

if off >= 0:

205

if off >= 0:

206

i = off / self.s

206

i = off / self.s

207

off = i * self.s

207

off = i * self.s

208

n = data[off + ngshaoffset:off + ngshaoffset + 20]

208

n = data[off + ngshaoffset:off + ngshaoffset + 20]

209

if n == node:

209

if n == node:

210

self.map[n] = i + start / self.s

210

self.map[n] = i + start / self.s

211

return node

211

return node

212

else:

212

else:

213

break

213

break

214

end -= blocksize

214

end -= blocksize

215

return None

215

return None

216

217

def loadindex(self, i=None, end=None):

217

def loadindex(self, i=None, end=None):

218

if self.all:

218

if self.all:

219

return

219

return

220

all = False

220

all = False

221

if i == None:

221

if i == None:

222

blockstart = 0

222

blockstart = 0

223

blocksize = (65536 / self.s) * self.s

223

blocksize = (65536 / self.s) * self.s

224

end = self.datasize

224

end = self.datasize

225

all = True

225

all = True

226

else:

226

else:

227

if end:

227

if end:

228

blockstart = i * self.s

228

blockstart = i * self.s

229

end = end * self.s

229

end = end * self.s

230

blocksize = end - blockstart

230

blocksize = end - blockstart

231

else:

231

else:

232

blockstart = (i & ~1023) * self.s

232

blockstart = (i & ~1023) * self.s

233

blocksize = self.s * 1024

233

blocksize = self.s * 1024

234

end = blockstart + blocksize

234

end = blockstart + blocksize

235

while blockstart < end:

235

while blockstart < end:

236

self.loadblock(blockstart, blocksize)

236

self.loadblock(blockstart, blocksize)

237

blockstart += blocksize

237

blockstart += blocksize

238

if all:

238

if all:

239

self.all = True

239

self.all = True

240

241

class lazyindex(object):

241

class lazyindex(object):

242

"""a lazy version of the index array"""

242

"""a lazy version of the index array"""

243

def __init__(self, parser):

243

def __init__(self, parser):

244

self.p = parser

244

self.p = parser

245

def __len__(self):

245

def __len__(self):

246

return len(self.p.index)

246

return len(self.p.index)

247

def load(self, pos):

247

def load(self, pos):

248

if pos < 0:

248

if pos < 0:

249

pos += len(self.p.index)

249

pos += len(self.p.index)

250

self.p.loadindex(pos)

250

self.p.loadindex(pos)

251

return self.p.index[pos]

251

return self.p.index[pos]

252

def __getitem__(self, pos):

252

def __getitem__(self, pos):

253

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

253

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

254

def __setitem__(self, pos, item):

254

def __setitem__(self, pos, item):

255

self.p.index[pos] = _pack(indexformatng, *item)

255

self.p.index[pos] = _pack(indexformatng, *item)

256

def __delitem__(self, pos):

256

def __delitem__(self, pos):

257

del self.p.index[pos]

257

del self.p.index[pos]

258

def insert(self, pos, e):

258

def insert(self, pos, e):

259

self.p.index.insert(pos, _pack(indexformatng, *e))

259

self.p.index.insert(pos, _pack(indexformatng, *e))

260

def append(self, e):

260

def append(self, e):

261

self.p.index.append(_pack(indexformatng, *e))

261

self.p.index.append(_pack(indexformatng, *e))

262

263

class lazymap(object):

263

class lazymap(object):

264

"""a lazy version of the node map"""

264

"""a lazy version of the node map"""

265

def __init__(self, parser):

265

def __init__(self, parser):

266

self.p = parser

266

self.p = parser

267

def load(self, key):

267

def load(self, key):

268

n = self.p.findnode(key)

268

n = self.p.findnode(key)

269

if n == None:

269

if n == None:

270

raise KeyError(key)

270

raise KeyError(key)

271

def __contains__(self, key):

271

def __contains__(self, key):

272

if key in self.p.map:

272

if key in self.p.map:

273

return True

273

return True

274

self.p.loadmap()

274

self.p.loadmap()

275

return key in self.p.map

275

return key in self.p.map

276

def __iter__(self):

276

def __iter__(self):

277

yield nullid

277

yield nullid

278

for i in xrange(self.p.l):

278

for i in xrange(self.p.l):

279

ret = self.p.index[i]

279

ret = self.p.index[i]

280

if not ret:

280

if not ret:

281

self.p.loadindex(i)

281

self.p.loadindex(i)

282

ret = self.p.index[i]

282

ret = self.p.index[i]

283

if isinstance(ret, str):

283

if isinstance(ret, str):

284

ret = _unpack(indexformatng, ret)

284

ret = _unpack(indexformatng, ret)

285

yield ret[7]

285

yield ret[7]

286

def __getitem__(self, key):

286

def __getitem__(self, key):

287

try:

287

try:

288

return self.p.map[key]

288

return self.p.map[key]

289

except KeyError:

289

except KeyError:

290

try:

290

try:

291

self.load(key)

291

self.load(key)

292

return self.p.map[key]

292

return self.p.map[key]

293

except KeyError:

293

except KeyError:

294

raise KeyError("node " + hex(key))

294

raise KeyError("node " + hex(key))

295

def __setitem__(self, key, val):

295

def __setitem__(self, key, val):

296

self.p.map[key] = val

296

self.p.map[key] = val

297

def __delitem__(self, key):

297

def __delitem__(self, key):

298

del self.p.map[key]

298

del self.p.map[key]

299

300

indexformatv0 = ">4l20s20s20s"

300

indexformatv0 = ">4l20s20s20s"

301

v0shaoffset = 56

301

v0shaoffset = 56

302

303

class revlogoldio(object):

303

class revlogoldio(object):

304

def __init__(self):

304

def __init__(self):

305

self.size = struct.calcsize(indexformatv0)

305

self.size = struct.calcsize(indexformatv0)

306

307

def parseindex(self, fp, inline):

307

def parseindex(self, fp, inline):

308

s = self.size

308

s = self.size

309

index = []

309

index = []

310

nodemap = {nullid: nullrev}

310

nodemap = {nullid: nullrev}

311

n = off = 0

311

n = off = 0

312

data = fp.read()

312

data = fp.read()

313

l = len(data)

313

l = len(data)

314

while off + s <= l:

314

while off + s <= l:

315

cur = data[off:off + s]

315

cur = data[off:off + s]

316

off += s

316

off += s

317

e = _unpack(indexformatv0, cur)

317

e = _unpack(indexformatv0, cur)

318

# transform to revlogv1 format

318

# transform to revlogv1 format

319

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

319

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

320

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

320

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

321

index.append(e2)

321

index.append(e2)

322

nodemap[e[6]] = n

322

nodemap[e[6]] = n

323

n += 1

323

n += 1

324

325

return index, nodemap, None

325

return index, nodemap, None

326

327

def packentry(self, entry, node, version, rev):

327

def packentry(self, entry, node, version, rev):

328

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

328

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

329

node(entry[5]), node(entry[6]), entry[7])

329

node(entry[5]), node(entry[6]), entry[7])

330

return _pack(indexformatv0, *e2)

330

return _pack(indexformatv0, *e2)

331

332

# index ng:

332

# index ng:

333

# 6 bytes offset

333

# 6 bytes offset

334

# 2 bytes flags

334

# 2 bytes flags

335

# 4 bytes compressed length

335

# 4 bytes compressed length

336

# 4 bytes uncompressed length

336

# 4 bytes uncompressed length

337

# 4 bytes: base rev

337

# 4 bytes: base rev

338

# 4 bytes link rev

338

# 4 bytes link rev

339

# 4 bytes parent 1 rev

339

# 4 bytes parent 1 rev

340

# 4 bytes parent 2 rev

340

# 4 bytes parent 2 rev

341

# 32 bytes: nodeid

341

# 32 bytes: nodeid

342

indexformatng = ">Qiiiiii20s12x"

342

indexformatng = ">Qiiiiii20s12x"

343

ngshaoffset = 32

343

ngshaoffset = 32

344

versionformat = ">I"

344

versionformat = ">I"

345

346

class revlogio(object):

346

class revlogio(object):

347

def __init__(self):

347

def __init__(self):

348

self.size = struct.calcsize(indexformatng)

348

self.size = struct.calcsize(indexformatng)

349

350

def parseindex(self, fp, inline):

350

def parseindex(self, fp, inline):

351

try:

351

try:

352

size = util.fstat(fp).st_size

352

size = util.fstat(fp).st_size

353

except AttributeError:

353

except AttributeError:

354

size = 0

354

size = 0

355

356

if util.openhardlinks() and not inline and size > 1000000:

356

if util.openhardlinks() and not inline and size > 1000000:

357

# big index, let's parse it on demand

357

# big index, let's parse it on demand

358

parser = lazyparser(fp, size)

358

parser = lazyparser(fp, size)

359

index = lazyindex(parser)

359

index = lazyindex(parser)

360

nodemap = lazymap(parser)

360

nodemap = lazymap(parser)

361

e = list(index[0])

361

e = list(index[0])

362

type = gettype(e[0])

362

type = gettype(e[0])

363

e[0] = offset_type(0, type)

363

e[0] = offset_type(0, type)

364

index[0] = e

364

index[0] = e

365

return index, nodemap, None

365

return index, nodemap, None

366

367

data = fp.read()

367

data = fp.read()

368

# call the C implementation to parse the index data

368

# call the C implementation to parse the index data

369

index, nodemap, cache = parsers.parse_index(data, inline)

369

index, nodemap, cache = parsers.parse_index(data, inline)

370

return index, nodemap, cache

370

return index, nodemap, cache

371

372

def packentry(self, entry, node, version, rev):

372

def packentry(self, entry, node, version, rev):

373

p = _pack(indexformatng, *entry)

373

p = _pack(indexformatng, *entry)

374

if rev == 0:

374

if rev == 0:

375

p = _pack(versionformat, version) + p[4:]

375

p = _pack(versionformat, version) + p[4:]

376

return p

376

return p

377

378

class revlog(object):

378

class revlog(object):

379

"""

379

"""

380

the underlying revision storage object

380

the underlying revision storage object

381

382

A revlog consists of two parts, an index and the revision data.

382

A revlog consists of two parts, an index and the revision data.

383

384

The index is a file with a fixed record size containing

384

The index is a file with a fixed record size containing

385

information on each revision, including its nodeid (hash), the

385

information on each revision, including its nodeid (hash), the

386

nodeids of its parents, the position and offset of its data within

386

nodeids of its parents, the position and offset of its data within

387

the data file, and the revision it's based on. Finally, each entry

387

the data file, and the revision it's based on. Finally, each entry

388

contains a linkrev entry that can serve as a pointer to external

388

contains a linkrev entry that can serve as a pointer to external

389

data.

389

data.

390

391

The revision data itself is a linear collection of data chunks.

391

The revision data itself is a linear collection of data chunks.

392

Each chunk represents a revision and is usually represented as a

392

Each chunk represents a revision and is usually represented as a

393

delta against the previous chunk. To bound lookup time, runs of

393

delta against the previous chunk. To bound lookup time, runs of

394

deltas are limited to about 2 times the length of the original

394

deltas are limited to about 2 times the length of the original

395

version data. This makes retrieval of a version proportional to

395

version data. This makes retrieval of a version proportional to

396

its size, or O(1) relative to the number of revisions.

396

its size, or O(1) relative to the number of revisions.

397

398

Both pieces of the revlog are written to in an append-only

398

Both pieces of the revlog are written to in an append-only

399

fashion, which means we never need to rewrite a file to insert or

399

fashion, which means we never need to rewrite a file to insert or

400

remove data, and can use some simple techniques to avoid the need

400

remove data, and can use some simple techniques to avoid the need

401

for locking while reading.

401

for locking while reading.

402

"""

402

"""

403

def __init__(self, opener, indexfile):

403

def __init__(self, opener, indexfile):

404

"""

404

"""

405

create a revlog object

405

create a revlog object

406

407

opener is a function that abstracts the file opening operation

407

opener is a function that abstracts the file opening operation

408

and can be used to implement COW semantics or the like.

408

and can be used to implement COW semantics or the like.

409

"""

409

"""

410

self.indexfile = indexfile

410

self.indexfile = indexfile

411

self.datafile = indexfile[:-2] + ".d"

411

self.datafile = indexfile[:-2] + ".d"

412

self.opener = opener

412

self.opener = opener

413

self._cache = None

413

self._cache = None

414

self._chunkcache = None

414

self._chunkcache = None

415

self.nodemap = {nullid: nullrev}

415

self.nodemap = {nullid: nullrev}

416

self.index = []

416

self.index = []

417

418

v = REVLOG_DEFAULT_VERSION

418

v = REVLOG_DEFAULT_VERSION

419

if hasattr(opener, "defversion"):

419

if hasattr(opener, "defversion"):

420

v = opener.defversion

420

v = opener.defversion

421

if v & REVLOGNG:

421

if v & REVLOGNG:

422

v |= REVLOGNGINLINEDATA

422

v |= REVLOGNGINLINEDATA

423

424

i = ""

424

i = ""

425

try:

425

try:

426

f = self.opener(self.indexfile)

426

f = self.opener(self.indexfile)

427

i = f.read(4)

427

i = f.read(4)

428

f.seek(0)

428

f.seek(0)

429

if len(i) > 0:

429

if len(i) > 0:

430

v = struct.unpack(versionformat, i)[0]

430

v = struct.unpack(versionformat, i)[0]

431

except IOError, inst:

431

except IOError, inst:

432

if inst.errno != errno.ENOENT:

432

if inst.errno != errno.ENOENT:

433

raise

433

raise

434

435

self.version = v

435

self.version = v

436

self._inline = v & REVLOGNGINLINEDATA

436

self._inline = v & REVLOGNGINLINEDATA

437

flags = v & ~0xFFFF

437

flags = v & ~0xFFFF

438

fmt = v & 0xFFFF

438

fmt = v & 0xFFFF

439

if fmt == REVLOGV0 and flags:

439

if fmt == REVLOGV0 and flags:

440

raise RevlogError(_("index %s unknown flags %#04x for format v0")

440

raise RevlogError(_("index %s unknown flags %#04x for format v0")

441

% (self.indexfile, flags >> 16))

441

% (self.indexfile, flags >> 16))

442

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

442

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

443

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

443

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

444

% (self.indexfile, flags >> 16))

444

% (self.indexfile, flags >> 16))

445

elif fmt > REVLOGNG:

445

elif fmt > REVLOGNG:

446

raise RevlogError(_("index %s unknown format %d")

446

raise RevlogError(_("index %s unknown format %d")

447

% (self.indexfile, fmt))

447

% (self.indexfile, fmt))

448

449

self._io = revlogio()

449

self._io = revlogio()

450

if self.version == REVLOGV0:

450

if self.version == REVLOGV0:

451

self._io = revlogoldio()

451

self._io = revlogoldio()

452

if i:

452

if i:

453

d = self._io.parseindex(f, self._inline)

453

try:

454

d = self._io.parseindex(f, self._inline)

455

except (ValueError, IndexError), e:

456

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

454

self.index, self.nodemap, self._chunkcache = d

457

self.index, self.nodemap, self._chunkcache = d

455

458

456

# add the magic null revision at -1 (if it hasn't been done already)

459

# add the magic null revision at -1 (if it hasn't been done already)

457

if (self.index == [] or isinstance(self.index, lazyindex) or

460

if (self.index == [] or isinstance(self.index, lazyindex) or

458

self.index[-1][7] != nullid) :

461

self.index[-1][7] != nullid) :

459

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

462

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

460

463

461

def _loadindex(self, start, end):

464

def _loadindex(self, start, end):

462

"""load a block of indexes all at once from the lazy parser"""

465

"""load a block of indexes all at once from the lazy parser"""

463

if isinstance(self.index, lazyindex):

466

if isinstance(self.index, lazyindex):

464

self.index.p.loadindex(start, end)

467

self.index.p.loadindex(start, end)

465

468

466

def _loadindexmap(self):

469

def _loadindexmap(self):

467

"""loads both the map and the index from the lazy parser"""

470

"""loads both the map and the index from the lazy parser"""

468

if isinstance(self.index, lazyindex):

471

if isinstance(self.index, lazyindex):

469

p = self.index.p

472

p = self.index.p

470

p.loadindex()

473

p.loadindex()

471

self.nodemap = p.map

474

self.nodemap = p.map

472

475

473

def _loadmap(self):

476

def _loadmap(self):

474

"""loads the map from the lazy parser"""

477

"""loads the map from the lazy parser"""

475

if isinstance(self.nodemap, lazymap):

478

if isinstance(self.nodemap, lazymap):

476

self.nodemap.p.loadmap()

479

self.nodemap.p.loadmap()

477

self.nodemap = self.nodemap.p.map

480

self.nodemap = self.nodemap.p.map

478

481

479

def tip(self):

482

def tip(self):

480

return self.node(len(self.index) - 2)

483

return self.node(len(self.index) - 2)

481

def __len__(self):

484

def __len__(self):

482

return len(self.index) - 1

485

return len(self.index) - 1

483

def __iter__(self):

486

def __iter__(self):

484

for i in xrange(len(self)):

487

for i in xrange(len(self)):

485

yield i

488

yield i

486

def rev(self, node):

489

def rev(self, node):

487

try:

490

try:

488

return self.nodemap[node]

491

return self.nodemap[node]

489

except KeyError:

492

except KeyError:

490

raise LookupError(node, self.indexfile, _('no node'))

493

raise LookupError(node, self.indexfile, _('no node'))

491

def node(self, rev):

494

def node(self, rev):

492

return self.index[rev][7]

495

return self.index[rev][7]

493

def linkrev(self, rev):

496

def linkrev(self, rev):

494

return self.index[rev][4]

497

return self.index[rev][4]

495

def parents(self, node):

498

def parents(self, node):

496

i = self.index

499

i = self.index

497

d = i[self.rev(node)]

500

d = i[self.rev(node)]

498

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

501

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

499

def parentrevs(self, rev):

502

def parentrevs(self, rev):

500

return self.index[rev][5:7]

503

return self.index[rev][5:7]

501

def start(self, rev):

504

def start(self, rev):

502

return int(self.index[rev][0] >> 16)

505

return int(self.index[rev][0] >> 16)

503

def end(self, rev):

506

def end(self, rev):

504

return self.start(rev) + self.length(rev)

507

return self.start(rev) + self.length(rev)

505

def length(self, rev):

508

def length(self, rev):

506

return self.index[rev][1]

509

return self.index[rev][1]

507

def base(self, rev):

510

def base(self, rev):

508

return self.index[rev][3]

511

return self.index[rev][3]

509

512

510

def size(self, rev):

513

def size(self, rev):

511

"""return the length of the uncompressed text for a given revision"""

514

"""return the length of the uncompressed text for a given revision"""

512

l = self.index[rev][2]

515

l = self.index[rev][2]

513

if l >= 0:

516

if l >= 0:

514

return l

517

return l

515

518

516

t = self.revision(self.node(rev))

519

t = self.revision(self.node(rev))

517

return len(t)

520

return len(t)

518

521

519

# alternate implementation, The advantage to this code is it

522

# alternate implementation, The advantage to this code is it

520

# will be faster for a single revision. But, the results are not

523

# will be faster for a single revision. But, the results are not

521

# cached, so finding the size of every revision will be slower.

524

# cached, so finding the size of every revision will be slower.

522

"""

525

"""

523

if self.cache and self.cache[1] == rev:

526

if self.cache and self.cache[1] == rev:

524

return len(self.cache[2])

527

return len(self.cache[2])

525

528

526

base = self.base(rev)

529

base = self.base(rev)

527

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

530

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

528

base = self.cache[1]

531

base = self.cache[1]

529

text = self.cache[2]

532

text = self.cache[2]

530

else:

533

else:

531

text = self.revision(self.node(base))

534

text = self.revision(self.node(base))

532

535

533

l = len(text)

536

l = len(text)

534

for x in xrange(base + 1, rev + 1):

537

for x in xrange(base + 1, rev + 1):

535

l = mdiff.patchedsize(l, self.chunk(x))

538

l = mdiff.patchedsize(l, self.chunk(x))

536

return l

539

return l

537

"""

540

"""

538

541

539

def reachable(self, node, stop=None):

542

def reachable(self, node, stop=None):

540

"""return a hash of all nodes ancestral to a given node, including

543

"""return a hash of all nodes ancestral to a given node, including

541

the node itself, stopping when stop is matched"""

544

the node itself, stopping when stop is matched"""

542

reachable = {}

545

reachable = {}

543

visit = [node]

546

visit = [node]

544

reachable[node] = 1

547

reachable[node] = 1

545

if stop:

548

if stop:

546

stopn = self.rev(stop)

549

stopn = self.rev(stop)

547

else:

550

else:

548

stopn = 0

551

stopn = 0

549

while visit:

552

while visit:

550

n = visit.pop(0)

553

n = visit.pop(0)

551

if n == stop:

554

if n == stop:

552

continue

555

continue

553

if n == nullid:

556

if n == nullid:

554

continue

557

continue

555

for p in self.parents(n):

558

for p in self.parents(n):

556

if self.rev(p) < stopn:

559

if self.rev(p) < stopn:

557

continue

560

continue

558

if p not in reachable:

561

if p not in reachable:

559

reachable[p] = 1

562

reachable[p] = 1

560

visit.append(p)

563

visit.append(p)

561

return reachable

564

return reachable

562

565

563

def ancestors(self, *revs):

566

def ancestors(self, *revs):

564

'Generate the ancestors of revs using a breadth-first visit'

567

'Generate the ancestors of revs using a breadth-first visit'

565

visit = list(revs)

568

visit = list(revs)

566

seen = util.set([nullrev])

569

seen = util.set([nullrev])

567

while visit:

570

while visit:

568

for parent in self.parentrevs(visit.pop(0)):

571

for parent in self.parentrevs(visit.pop(0)):

569

if parent not in seen:

572

if parent not in seen:

570

visit.append(parent)

573

visit.append(parent)

571

seen.add(parent)

574

seen.add(parent)

572

yield parent

575

yield parent

573

576

574

def descendants(self, *revs):

577

def descendants(self, *revs):

575

'Generate the descendants of revs in topological order'

578

'Generate the descendants of revs in topological order'

576

seen = util.set(revs)

579

seen = util.set(revs)

577

for i in xrange(min(revs) + 1, len(self)):

580

for i in xrange(min(revs) + 1, len(self)):

578

for x in self.parentrevs(i):

581

for x in self.parentrevs(i):

579

if x != nullrev and x in seen:

582

if x != nullrev and x in seen:

580

seen.add(i)

583

seen.add(i)

581

yield i

584

yield i

582

break

585

break

583

586

584

def findmissing(self, common=None, heads=None):

587

def findmissing(self, common=None, heads=None):

585

'''

588

'''

586

returns the topologically sorted list of nodes from the set:

589

returns the topologically sorted list of nodes from the set:

587

missing = (ancestors(heads) \ ancestors(common))

590

missing = (ancestors(heads) \ ancestors(common))

588

591

589

where ancestors() is the set of ancestors from heads, heads included

592

where ancestors() is the set of ancestors from heads, heads included

590

593

591

if heads is None, the heads of the revlog are used

594

if heads is None, the heads of the revlog are used

592

if common is None, nullid is assumed to be a common node

595

if common is None, nullid is assumed to be a common node

593

'''

596

'''

594

if common is None:

597

if common is None:

595

common = [nullid]

598

common = [nullid]

596

if heads is None:

599

if heads is None:

597

heads = self.heads()

600

heads = self.heads()

598

601

599

common = [self.rev(n) for n in common]

602

common = [self.rev(n) for n in common]

600

heads = [self.rev(n) for n in heads]

603

heads = [self.rev(n) for n in heads]

601

604

602

# we want the ancestors, but inclusive

605

# we want the ancestors, but inclusive

603

has = dict.fromkeys(self.ancestors(*common))

606

has = dict.fromkeys(self.ancestors(*common))

604

has[nullrev] = None

607

has[nullrev] = None

605

for r in common:

608

for r in common:

606

has[r] = None

609

has[r] = None

607

610

608

# take all ancestors from heads that aren't in has

611

# take all ancestors from heads that aren't in has

609

missing = {}

612

missing = {}

610

visit = [r for r in heads if r not in has]

613

visit = [r for r in heads if r not in has]

611

while visit:

614

while visit:

612

r = visit.pop(0)

615

r = visit.pop(0)

613

if r in missing:

616

if r in missing:

614

continue

617

continue

615

else:

618

else:

616

missing[r] = None

619

missing[r] = None

617

for p in self.parentrevs(r):

620

for p in self.parentrevs(r):

618

if p not in has:

621

if p not in has:

619

visit.append(p)

622

visit.append(p)

620

missing = missing.keys()

623

missing = missing.keys()

621

missing.sort()

624

missing.sort()

622

return [self.node(r) for r in missing]

625

return [self.node(r) for r in missing]

623

626

624

def nodesbetween(self, roots=None, heads=None):

627

def nodesbetween(self, roots=None, heads=None):

625

"""Return a tuple containing three elements. Elements 1 and 2 contain

628

"""Return a tuple containing three elements. Elements 1 and 2 contain

626

a final list bases and heads after all the unreachable ones have been

629

a final list bases and heads after all the unreachable ones have been

627

pruned. Element 0 contains a topologically sorted list of all

630

pruned. Element 0 contains a topologically sorted list of all

628

631

629

nodes that satisfy these constraints:

632

nodes that satisfy these constraints:

630

1. All nodes must be descended from a node in roots (the nodes on

633

1. All nodes must be descended from a node in roots (the nodes on

631

roots are considered descended from themselves).

634

roots are considered descended from themselves).

632

2. All nodes must also be ancestors of a node in heads (the nodes in

635

2. All nodes must also be ancestors of a node in heads (the nodes in

633

heads are considered to be their own ancestors).

636

heads are considered to be their own ancestors).

634

637

635

If roots is unspecified, nullid is assumed as the only root.

638

If roots is unspecified, nullid is assumed as the only root.

636

If heads is unspecified, it is taken to be the output of the

639

If heads is unspecified, it is taken to be the output of the

637

heads method (i.e. a list of all nodes in the repository that

640

heads method (i.e. a list of all nodes in the repository that

638

have no children)."""

641

have no children)."""

639

nonodes = ([], [], [])

642

nonodes = ([], [], [])

640

if roots is not None:

643

if roots is not None:

641

roots = list(roots)

644

roots = list(roots)

642

if not roots:

645

if not roots:

643

return nonodes

646

return nonodes

644

lowestrev = min([self.rev(n) for n in roots])

647

lowestrev = min([self.rev(n) for n in roots])

645

else:

648

else:

646

roots = [nullid] # Everybody's a descendent of nullid

649

roots = [nullid] # Everybody's a descendent of nullid

647

lowestrev = nullrev

650

lowestrev = nullrev

648

if (lowestrev == nullrev) and (heads is None):

651

if (lowestrev == nullrev) and (heads is None):

649

# We want _all_ the nodes!

652

# We want _all_ the nodes!

650

return ([self.node(r) for r in self], [nullid], list(self.heads()))

653

return ([self.node(r) for r in self], [nullid], list(self.heads()))

651

if heads is None:

654

if heads is None:

652

# All nodes are ancestors, so the latest ancestor is the last

655

# All nodes are ancestors, so the latest ancestor is the last

653

# node.

656

# node.

654

highestrev = len(self) - 1

657

highestrev = len(self) - 1

655

# Set ancestors to None to signal that every node is an ancestor.

658

# Set ancestors to None to signal that every node is an ancestor.

656

ancestors = None

659

ancestors = None

657

# Set heads to an empty dictionary for later discovery of heads

660

# Set heads to an empty dictionary for later discovery of heads

658

heads = {}

661

heads = {}

659

else:

662

else:

660

heads = list(heads)

663

heads = list(heads)

661

if not heads:

664

if not heads:

662

return nonodes

665

return nonodes

663

ancestors = {}

666

ancestors = {}

664

# Turn heads into a dictionary so we can remove 'fake' heads.

667

# Turn heads into a dictionary so we can remove 'fake' heads.

665

# Also, later we will be using it to filter out the heads we can't

668

# Also, later we will be using it to filter out the heads we can't

666

# find from roots.

669

# find from roots.

667

heads = dict.fromkeys(heads, 0)

670

heads = dict.fromkeys(heads, 0)

668

# Start at the top and keep marking parents until we're done.

671

# Start at the top and keep marking parents until we're done.

669

nodestotag = heads.keys()

672

nodestotag = heads.keys()

670

# Remember where the top was so we can use it as a limit later.

673

# Remember where the top was so we can use it as a limit later.

671

highestrev = max([self.rev(n) for n in nodestotag])

674

highestrev = max([self.rev(n) for n in nodestotag])

672

while nodestotag:

675

while nodestotag:

673

# grab a node to tag

676

# grab a node to tag

674

n = nodestotag.pop()

677

n = nodestotag.pop()

675

# Never tag nullid

678

# Never tag nullid

676

if n == nullid:

679

if n == nullid:

677

continue

680

continue

678

# A node's revision number represents its place in a

681

# A node's revision number represents its place in a

679

# topologically sorted list of nodes.

682

# topologically sorted list of nodes.

680

r = self.rev(n)

683

r = self.rev(n)

681

if r >= lowestrev:

684

if r >= lowestrev:

682

if n not in ancestors:

685

if n not in ancestors:

683

# If we are possibly a descendent of one of the roots

686

# If we are possibly a descendent of one of the roots

684

# and we haven't already been marked as an ancestor

687

# and we haven't already been marked as an ancestor

685

ancestors[n] = 1 # Mark as ancestor

688

ancestors[n] = 1 # Mark as ancestor

686

# Add non-nullid parents to list of nodes to tag.

689

# Add non-nullid parents to list of nodes to tag.

687

nodestotag.extend([p for p in self.parents(n) if

690

nodestotag.extend([p for p in self.parents(n) if

688

p != nullid])

691

p != nullid])

689

elif n in heads: # We've seen it before, is it a fake head?

692

elif n in heads: # We've seen it before, is it a fake head?

690

# So it is, real heads should not be the ancestors of

693

# So it is, real heads should not be the ancestors of

691

# any other heads.

694

# any other heads.

692

heads.pop(n)

695

heads.pop(n)

693

if not ancestors:

696

if not ancestors:

694

return nonodes

697

return nonodes

695

# Now that we have our set of ancestors, we want to remove any

698

# Now that we have our set of ancestors, we want to remove any

696

# roots that are not ancestors.

699

# roots that are not ancestors.

697

700

698

# If one of the roots was nullid, everything is included anyway.

701

# If one of the roots was nullid, everything is included anyway.

699

if lowestrev > nullrev:

702

if lowestrev > nullrev:

700

# But, since we weren't, let's recompute the lowest rev to not

703

# But, since we weren't, let's recompute the lowest rev to not

701

# include roots that aren't ancestors.

704

# include roots that aren't ancestors.

702

705

703

# Filter out roots that aren't ancestors of heads

706

# Filter out roots that aren't ancestors of heads

704

roots = [n for n in roots if n in ancestors]

707

roots = [n for n in roots if n in ancestors]

705

# Recompute the lowest revision

708

# Recompute the lowest revision

706

if roots:

709

if roots:

707

lowestrev = min([self.rev(n) for n in roots])

710

lowestrev = min([self.rev(n) for n in roots])

708

else:

711

else:

709

# No more roots? Return empty list

712

# No more roots? Return empty list

710

return nonodes

713

return nonodes

711

else:

714

else:

712

# We are descending from nullid, and don't need to care about

715

# We are descending from nullid, and don't need to care about

713

# any other roots.

716

# any other roots.

714

lowestrev = nullrev

717

lowestrev = nullrev

715

roots = [nullid]

718

roots = [nullid]

716

# Transform our roots list into a 'set' (i.e. a dictionary where the

719

# Transform our roots list into a 'set' (i.e. a dictionary where the

717

# values don't matter.

720

# values don't matter.

718

descendents = dict.fromkeys(roots, 1)

721

descendents = dict.fromkeys(roots, 1)

719

# Also, keep the original roots so we can filter out roots that aren't

722

# Also, keep the original roots so we can filter out roots that aren't

720

# 'real' roots (i.e. are descended from other roots).

723

# 'real' roots (i.e. are descended from other roots).

721

roots = descendents.copy()

724

roots = descendents.copy()

722

# Our topologically sorted list of output nodes.

725

# Our topologically sorted list of output nodes.

723

orderedout = []

726

orderedout = []

724

# Don't start at nullid since we don't want nullid in our output list,

727

# Don't start at nullid since we don't want nullid in our output list,

725

# and if nullid shows up in descedents, empty parents will look like

728

# and if nullid shows up in descedents, empty parents will look like

726

# they're descendents.

729

# they're descendents.

727

for r in xrange(max(lowestrev, 0), highestrev + 1):

730

for r in xrange(max(lowestrev, 0), highestrev + 1):

728

n = self.node(r)

731

n = self.node(r)

729

isdescendent = False

732

isdescendent = False

730

if lowestrev == nullrev: # Everybody is a descendent of nullid

733

if lowestrev == nullrev: # Everybody is a descendent of nullid

731

isdescendent = True

734

isdescendent = True

732

elif n in descendents:

735

elif n in descendents:

733

# n is already a descendent

736

# n is already a descendent

734

isdescendent = True

737

isdescendent = True

735

# This check only needs to be done here because all the roots

738

# This check only needs to be done here because all the roots

736

# will start being marked is descendents before the loop.

739

# will start being marked is descendents before the loop.

737

if n in roots:

740

if n in roots:

738

# If n was a root, check if it's a 'real' root.

741

# If n was a root, check if it's a 'real' root.

739

p = tuple(self.parents(n))

742

p = tuple(self.parents(n))

740

# If any of its parents are descendents, it's not a root.

743

# If any of its parents are descendents, it's not a root.

741

if (p[0] in descendents) or (p[1] in descendents):

744

if (p[0] in descendents) or (p[1] in descendents):

742

roots.pop(n)

745

roots.pop(n)

743

else:

746

else:

744

p = tuple(self.parents(n))

747

p = tuple(self.parents(n))

745

# A node is a descendent if either of its parents are

748

# A node is a descendent if either of its parents are

746

# descendents. (We seeded the dependents list with the roots

749

# descendents. (We seeded the dependents list with the roots

747

# up there, remember?)

750

# up there, remember?)

748

if (p[0] in descendents) or (p[1] in descendents):

751

if (p[0] in descendents) or (p[1] in descendents):

749

descendents[n] = 1

752

descendents[n] = 1

750

isdescendent = True

753

isdescendent = True

751

if isdescendent and ((ancestors is None) or (n in ancestors)):

754

if isdescendent and ((ancestors is None) or (n in ancestors)):

752

# Only include nodes that are both descendents and ancestors.

755

# Only include nodes that are both descendents and ancestors.

753

orderedout.append(n)

756

orderedout.append(n)

754

if (ancestors is not None) and (n in heads):

757

if (ancestors is not None) and (n in heads):

755

# We're trying to figure out which heads are reachable

758

# We're trying to figure out which heads are reachable

756

# from roots.

759

# from roots.

757

# Mark this head as having been reached

760

# Mark this head as having been reached

758

heads[n] = 1

761

heads[n] = 1

759

elif ancestors is None:

762

elif ancestors is None:

760

# Otherwise, we're trying to discover the heads.

763

# Otherwise, we're trying to discover the heads.

761

# Assume this is a head because if it isn't, the next step

764

# Assume this is a head because if it isn't, the next step

762

# will eventually remove it.

765

# will eventually remove it.

763

heads[n] = 1

766

heads[n] = 1

764

# But, obviously its parents aren't.

767

# But, obviously its parents aren't.

765

for p in self.parents(n):

768

for p in self.parents(n):

766

heads.pop(p, None)

769

heads.pop(p, None)

767

heads = [n for n in heads.iterkeys() if heads[n] != 0]

770

heads = [n for n in heads.iterkeys() if heads[n] != 0]

768

roots = roots.keys()

771

roots = roots.keys()

769

assert orderedout

772

assert orderedout

770

assert roots

773

assert roots

771

assert heads

774

assert heads

772

return (orderedout, roots, heads)

775

return (orderedout, roots, heads)

773

776

774

def heads(self, start=None, stop=None):

777

def heads(self, start=None, stop=None):

775

"""return the list of all nodes that have no children

778

"""return the list of all nodes that have no children

776

779

777

if start is specified, only heads that are descendants of

780

if start is specified, only heads that are descendants of

778

start will be returned

781

start will be returned

779

if stop is specified, it will consider all the revs from stop

782

if stop is specified, it will consider all the revs from stop

780

as if they had no children

783

as if they had no children

781

"""

784

"""

782

if start is None and stop is None:

785

if start is None and stop is None:

783

count = len(self)

786

count = len(self)

784

if not count:

787

if not count:

785

return [nullid]

788

return [nullid]

786

ishead = [1] * (count + 1)

789

ishead = [1] * (count + 1)

787

index = self.index

790

index = self.index

788

for r in xrange(count):

791

for r in xrange(count):

789

e = index[r]

792

e = index[r]

790

ishead[e[5]] = ishead[e[6]] = 0

793

ishead[e[5]] = ishead[e[6]] = 0

791

return [self.node(r) for r in xrange(count) if ishead[r]]

794

return [self.node(r) for r in xrange(count) if ishead[r]]

792

795

793

if start is None:

796

if start is None:

794

start = nullid

797

start = nullid

795

if stop is None:

798

if stop is None:

796

stop = []

799

stop = []

797

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

800

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

798

startrev = self.rev(start)

801

startrev = self.rev(start)

799

reachable = {startrev: 1}

802

reachable = {startrev: 1}

800

heads = {startrev: 1}

803

heads = {startrev: 1}

801

804

802

parentrevs = self.parentrevs

805

parentrevs = self.parentrevs

803

for r in xrange(startrev + 1, len(self)):

806

for r in xrange(startrev + 1, len(self)):

804

for p in parentrevs(r):

807

for p in parentrevs(r):

805

if p in reachable:

808

if p in reachable:

806

if r not in stoprevs:

809

if r not in stoprevs:

807

reachable[r] = 1

810

reachable[r] = 1

808

heads[r] = 1

811

heads[r] = 1

809

if p in heads and p not in stoprevs:

812

if p in heads and p not in stoprevs:

810

del heads[p]

813

del heads[p]

811

814

812

return [self.node(r) for r in heads]

815

return [self.node(r) for r in heads]

813

816

814

def children(self, node):

817

def children(self, node):

815

"""find the children of a given node"""

818

"""find the children of a given node"""

816

c = []

819

c = []

817

p = self.rev(node)

820

p = self.rev(node)

818

for r in range(p + 1, len(self)):

821

for r in range(p + 1, len(self)):

819

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

822

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

820

if prevs:

823

if prevs:

821

for pr in prevs:

824

for pr in prevs:

822

if pr == p:

825

if pr == p:

823

c.append(self.node(r))

826

c.append(self.node(r))

824

elif p == nullrev:

827

elif p == nullrev:

825

c.append(self.node(r))

828

c.append(self.node(r))

826

return c

829

return c

827

830

828

def _match(self, id):

831

def _match(self, id):

829

if isinstance(id, (long, int)):

832

if isinstance(id, (long, int)):

830

# rev

833

# rev

831

return self.node(id)

834

return self.node(id)

832

if len(id) == 20:

835

if len(id) == 20:

833

# possibly a binary node

836

# possibly a binary node

834

# odds of a binary node being all hex in ASCII are 1 in 10**25

837

# odds of a binary node being all hex in ASCII are 1 in 10**25

835

try:

838

try:

836

node = id

839

node = id

837

r = self.rev(node) # quick search the index

840

r = self.rev(node) # quick search the index

838

return node

841

return node

839

except LookupError:

842

except LookupError:

840

pass # may be partial hex id

843

pass # may be partial hex id

841

try:

844

try:

842

# str(rev)

845

# str(rev)

843

rev = int(id)

846

rev = int(id)

844

if str(rev) != id:

847

if str(rev) != id:

845

raise ValueError

848

raise ValueError

846

if rev < 0:

849

if rev < 0:

847

rev = len(self) + rev

850

rev = len(self) + rev

848

if rev < 0 or rev >= len(self):

851

if rev < 0 or rev >= len(self):

849

raise ValueError

852

raise ValueError

850

return self.node(rev)

853

return self.node(rev)

851

except (ValueError, OverflowError):

854

except (ValueError, OverflowError):

852

pass

855

pass

853

if len(id) == 40:

856

if len(id) == 40:

854

try:

857

try:

855

# a full hex nodeid?

858

# a full hex nodeid?

856

node = bin(id)

859

node = bin(id)

857

r = self.rev(node)

860

r = self.rev(node)

858

return node

861

return node

859

except (TypeError, LookupError):

862

except (TypeError, LookupError):

860

pass

863

pass

861

864

862

def _partialmatch(self, id):

865

def _partialmatch(self, id):

863

if len(id) < 40:

866

if len(id) < 40:

864

try:

867

try:

865

# hex(node)[:...]

868

# hex(node)[:...]

866

l = len(id) / 2 # grab an even number of digits

869

l = len(id) / 2 # grab an even number of digits

867

bin_id = bin(id[:l*2])

870

bin_id = bin(id[:l*2])

868

nl = [n for n in self.nodemap if n[:l] == bin_id]

871

nl = [n for n in self.nodemap if n[:l] == bin_id]

869

nl = [n for n in nl if hex(n).startswith(id)]

872

nl = [n for n in nl if hex(n).startswith(id)]

870

if len(nl) > 0:

873

if len(nl) > 0:

871

if len(nl) == 1:

874

if len(nl) == 1:

872

return nl[0]

875

return nl[0]

873

raise LookupError(id, self.indexfile,

876

raise LookupError(id, self.indexfile,

874

_('ambiguous identifier'))

877

_('ambiguous identifier'))

875

return None

878

return None

876

except TypeError:

879

except TypeError:

877

pass

880

pass

878

881

879

def lookup(self, id):

882

def lookup(self, id):

880

"""locate a node based on:

883

"""locate a node based on:

881

- revision number or str(revision number)

884

- revision number or str(revision number)

882

- nodeid or subset of hex nodeid

885

- nodeid or subset of hex nodeid

883

"""

886

"""

884

n = self._match(id)

887

n = self._match(id)

885

if n is not None:

888

if n is not None:

886

return n

889

return n

887

n = self._partialmatch(id)

890

n = self._partialmatch(id)

888

if n:

891

if n:

889

return n

892

return n

890

893

891

raise LookupError(id, self.indexfile, _('no match found'))

894

raise LookupError(id, self.indexfile, _('no match found'))

892

895

893

def cmp(self, node, text):

896

def cmp(self, node, text):

894

"""compare text with a given file revision"""

897

"""compare text with a given file revision"""

895

p1, p2 = self.parents(node)

898

p1, p2 = self.parents(node)

896

return hash(text, p1, p2) != node

899

return hash(text, p1, p2) != node

897

900

898

def chunk(self, rev, df=None):

901

def chunk(self, rev, df=None):

899

def loadcache(df):

902

def loadcache(df):

900

if not df:

903

if not df:

901

if self._inline:

904

if self._inline:

902

df = self.opener(self.indexfile)

905

df = self.opener(self.indexfile)

903

else:

906

else:

904

df = self.opener(self.datafile)

907

df = self.opener(self.datafile)

905

df.seek(start)

908

df.seek(start)

906

self._chunkcache = (start, df.read(cache_length))

909

self._chunkcache = (start, df.read(cache_length))

907

910

908

start, length = self.start(rev), self.length(rev)

911

start, length = self.start(rev), self.length(rev)

909

if self._inline:

912

if self._inline:

910

start += (rev + 1) * self._io.size

913

start += (rev + 1) * self._io.size

911

end = start + length

914

end = start + length

912

915

913

offset = 0

916

offset = 0

914

if not self._chunkcache:

917

if not self._chunkcache:

915

cache_length = max(65536, length)

918

cache_length = max(65536, length)

916

loadcache(df)

919

loadcache(df)

917

else:

920

else:

918

cache_start = self._chunkcache[0]

921

cache_start = self._chunkcache[0]

919

cache_length = len(self._chunkcache[1])

922

cache_length = len(self._chunkcache[1])

920

cache_end = cache_start + cache_length

923

cache_end = cache_start + cache_length

921

if start >= cache_start and end <= cache_end:

924

if start >= cache_start and end <= cache_end:

922

# it is cached

925

# it is cached

923

offset = start - cache_start

926

offset = start - cache_start

924

else:

927

else:

925

cache_length = max(65536, length)

928

cache_length = max(65536, length)

926

loadcache(df)

929

loadcache(df)

927

930

928

# avoid copying large chunks

931

# avoid copying large chunks

929

c = self._chunkcache[1]

932

c = self._chunkcache[1]

930

if cache_length != length:

933

if cache_length != length:

931

c = c[offset:offset + length]

934

c = c[offset:offset + length]

932

935

933

return decompress(c)

936

return decompress(c)

934

937

935

def revdiff(self, rev1, rev2):

938

def revdiff(self, rev1, rev2):

936

"""return or calculate a delta between two revisions"""

939

"""return or calculate a delta between two revisions"""

937

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

940

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

938

return self.chunk(rev2)

941

return self.chunk(rev2)

939

942

940

return mdiff.textdiff(self.revision(self.node(rev1)),

943

return mdiff.textdiff(self.revision(self.node(rev1)),

941

self.revision(self.node(rev2)))

944

self.revision(self.node(rev2)))

942

945

943

def revision(self, node):

946

def revision(self, node):

944

"""return an uncompressed revision of a given node"""

947

"""return an uncompressed revision of a given node"""

945

if node == nullid:

948

if node == nullid:

946

return ""

949

return ""

947

if self._cache and self._cache[0] == node:

950

if self._cache and self._cache[0] == node:

948

return str(self._cache[2])

951

return str(self._cache[2])

949

952

950

# look up what we need to read

953

# look up what we need to read

951

text = None

954

text = None

952

rev = self.rev(node)

955

rev = self.rev(node)

953

base = self.base(rev)

956

base = self.base(rev)

954

957

955

# check rev flags

958

# check rev flags

956

if self.index[rev][0] & 0xFFFF:

959

if self.index[rev][0] & 0xFFFF:

957

raise RevlogError(_('incompatible revision flag %x') %

960

raise RevlogError(_('incompatible revision flag %x') %

958

(self.index[rev][0] & 0xFFFF))

961

(self.index[rev][0] & 0xFFFF))

959

962

960

df = None

963

df = None

961

964

962

# do we have useful data cached?

965

# do we have useful data cached?

963

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

966

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

964

base = self._cache[1]

967

base = self._cache[1]

965

text = str(self._cache[2])

968

text = str(self._cache[2])

966

self._loadindex(base, rev + 1)

969

self._loadindex(base, rev + 1)

967

if not self._inline and rev > base + 1:

970

if not self._inline and rev > base + 1:

968

df = self.opener(self.datafile)

971

df = self.opener(self.datafile)

969

else:

972

else:

970

self._loadindex(base, rev + 1)

973

self._loadindex(base, rev + 1)

971

if not self._inline and rev > base:

974

if not self._inline and rev > base:

972

df = self.opener(self.datafile)

975

df = self.opener(self.datafile)

973

text = self.chunk(base, df=df)

976

text = self.chunk(base, df=df)

974

977

975

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

978

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

976

text = mdiff.patches(text, bins)

979

text = mdiff.patches(text, bins)

977

p1, p2 = self.parents(node)

980

p1, p2 = self.parents(node)

978

if node != hash(text, p1, p2):

981

if node != hash(text, p1, p2):

979

raise RevlogError(_("integrity check failed on %s:%d")

982

raise RevlogError(_("integrity check failed on %s:%d")

980

% (self.datafile, rev))

983

% (self.datafile, rev))

981

984

982

self._cache = (node, rev, text)

985

self._cache = (node, rev, text)

983

return text

986

return text

984

987

985

def checkinlinesize(self, tr, fp=None):

988

def checkinlinesize(self, tr, fp=None):

986

if not self._inline:

989

if not self._inline:

987

return

990

return

988

if not fp:

991

if not fp:

989

fp = self.opener(self.indexfile, 'r')

992

fp = self.opener(self.indexfile, 'r')

990

fp.seek(0, 2)

993

fp.seek(0, 2)

991

size = fp.tell()

994

size = fp.tell()

992

if size < 131072:

995

if size < 131072:

993

return

996

return

994

trinfo = tr.find(self.indexfile)

997

trinfo = tr.find(self.indexfile)

995

if trinfo == None:

998

if trinfo == None:

996

raise RevlogError(_("%s not found in the transaction")

999

raise RevlogError(_("%s not found in the transaction")

997

% self.indexfile)

1000

% self.indexfile)

998

1001

999

trindex = trinfo[2]

1002

trindex = trinfo[2]

1000

dataoff = self.start(trindex)

1003

dataoff = self.start(trindex)

1001

1004

1002

tr.add(self.datafile, dataoff)

1005

tr.add(self.datafile, dataoff)

1003

df = self.opener(self.datafile, 'w')

1006

df = self.opener(self.datafile, 'w')

1004

try:

1007

try:

1005

calc = self._io.size

1008

calc = self._io.size

1006

for r in self:

1009

for r in self:

1007

start = self.start(r) + (r + 1) * calc

1010

start = self.start(r) + (r + 1) * calc

1008

length = self.length(r)

1011

length = self.length(r)

1009

fp.seek(start)

1012

fp.seek(start)

1010

d = fp.read(length)

1013

d = fp.read(length)

1011

df.write(d)

1014

df.write(d)

1012

finally:

1015

finally:

1013

df.close()

1016

df.close()

1014

1017

1015

fp.close()

1018

fp.close()

1016

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1019

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1017

self.version &= ~(REVLOGNGINLINEDATA)

1020

self.version &= ~(REVLOGNGINLINEDATA)

1018

self._inline = False

1021

self._inline = False

1019

for i in self:

1022

for i in self:

1020

e = self._io.packentry(self.index[i], self.node, self.version, i)

1023

e = self._io.packentry(self.index[i], self.node, self.version, i)

1021

fp.write(e)

1024

fp.write(e)

1022

1025

1023

# if we don't call rename, the temp file will never replace the

1026

# if we don't call rename, the temp file will never replace the

1024

# real index

1027

# real index

1025

fp.rename()

1028

fp.rename()

1026

1029

1027

tr.replace(self.indexfile, trindex * calc)

1030

tr.replace(self.indexfile, trindex * calc)

1028

self._chunkcache = None

1031

self._chunkcache = None

1029

1032

1030

def addrevision(self, text, transaction, link, p1, p2, d=None):

1033

def addrevision(self, text, transaction, link, p1, p2, d=None):

1031

"""add a revision to the log

1034

"""add a revision to the log

1032

1035

1033

text - the revision data to add

1036

text - the revision data to add

1034

transaction - the transaction object used for rollback

1037

transaction - the transaction object used for rollback

1035

link - the linkrev data to add

1038

link - the linkrev data to add

1036

p1, p2 - the parent nodeids of the revision

1039

p1, p2 - the parent nodeids of the revision

1037

d - an optional precomputed delta

1040

d - an optional precomputed delta

1038

"""

1041

"""

1039

dfh = None

1042

dfh = None

1040

if not self._inline:

1043

if not self._inline:

1041

dfh = self.opener(self.datafile, "a")

1044

dfh = self.opener(self.datafile, "a")

1042

ifh = self.opener(self.indexfile, "a+")

1045

ifh = self.opener(self.indexfile, "a+")

1043

try:

1046

try:

1044

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1047

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1045

finally:

1048

finally:

1046

if dfh:

1049

if dfh:

1047

dfh.close()

1050

dfh.close()

1048

ifh.close()

1051

ifh.close()

1049

1052

1050

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1053

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1051

node = hash(text, p1, p2)

1054

node = hash(text, p1, p2)

1052

if node in self.nodemap:

1055

if node in self.nodemap:

1053

return node

1056

return node

1054

1057

1055

curr = len(self)

1058

curr = len(self)

1056

prev = curr - 1

1059

prev = curr - 1

1057

base = self.base(prev)

1060

base = self.base(prev)

1058

offset = self.end(prev)

1061

offset = self.end(prev)

1059

1062

1060

if curr:

1063

if curr:

1061

if not d:

1064

if not d:

1062

ptext = self.revision(self.node(prev))

1065

ptext = self.revision(self.node(prev))

1063

d = mdiff.textdiff(ptext, text)

1066

d = mdiff.textdiff(ptext, text)

1064

data = compress(d)

1067

data = compress(d)

1065

l = len(data[1]) + len(data[0])

1068

l = len(data[1]) + len(data[0])

1066

dist = l + offset - self.start(base)

1069

dist = l + offset - self.start(base)

1067

1070

1068

# full versions are inserted when the needed deltas

1071

# full versions are inserted when the needed deltas

1069

# become comparable to the uncompressed text

1072

# become comparable to the uncompressed text

1070

if not curr or dist > len(text) * 2:

1073

if not curr or dist > len(text) * 2:

1071

data = compress(text)

1074

data = compress(text)

1072

l = len(data[1]) + len(data[0])

1075

l = len(data[1]) + len(data[0])

1073

base = curr

1076

base = curr

1074

1077

1075

e = (offset_type(offset, 0), l, len(text),

1078

e = (offset_type(offset, 0), l, len(text),

1076

base, link, self.rev(p1), self.rev(p2), node)

1079

base, link, self.rev(p1), self.rev(p2), node)

1077

self.index.insert(-1, e)

1080

self.index.insert(-1, e)

1078

self.nodemap[node] = curr

1081

self.nodemap[node] = curr

1079

1082

1080

entry = self._io.packentry(e, self.node, self.version, curr)

1083

entry = self._io.packentry(e, self.node, self.version, curr)

1081

if not self._inline:

1084

if not self._inline:

1082

transaction.add(self.datafile, offset)

1085

transaction.add(self.datafile, offset)

1083

transaction.add(self.indexfile, curr * len(entry))

1086

transaction.add(self.indexfile, curr * len(entry))

1084

if data[0]:

1087

if data[0]:

1085

dfh.write(data[0])

1088

dfh.write(data[0])

1086

dfh.write(data[1])

1089

dfh.write(data[1])

1087

dfh.flush()

1090

dfh.flush()

1088

ifh.write(entry)

1091

ifh.write(entry)

1089

else:

1092

else:

1090

offset += curr * self._io.size

1093

offset += curr * self._io.size

1091

transaction.add(self.indexfile, offset, curr)

1094

transaction.add(self.indexfile, offset, curr)

1092

ifh.write(entry)

1095

ifh.write(entry)

1093

ifh.write(data[0])

1096

ifh.write(data[0])

1094

ifh.write(data[1])

1097

ifh.write(data[1])

1095

self.checkinlinesize(transaction, ifh)

1098

self.checkinlinesize(transaction, ifh)

1096

1099

1097

self._cache = (node, curr, text)

1100

self._cache = (node, curr, text)

1098

return node

1101

return node

1099

1102

1100

def ancestor(self, a, b):

1103

def ancestor(self, a, b):

1101

"""calculate the least common ancestor of nodes a and b"""

1104

"""calculate the least common ancestor of nodes a and b"""

1102

1105

1103

def parents(rev):

1106

def parents(rev):

1104

return [p for p in self.parentrevs(rev) if p != nullrev]

1107

return [p for p in self.parentrevs(rev) if p != nullrev]

1105

1108

1106

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1109

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1107

if c is None:

1110

if c is None:

1108

return nullid

1111

return nullid

1109

1112

1110

return self.node(c)

1113

return self.node(c)

1111

1114

1112

def group(self, nodelist, lookup, infocollect=None):

1115

def group(self, nodelist, lookup, infocollect=None):

1113

"""calculate a delta group

1116

"""calculate a delta group

1114

1117

1115

Given a list of changeset revs, return a set of deltas and

1118

Given a list of changeset revs, return a set of deltas and

1116

metadata corresponding to nodes. the first delta is

1119

metadata corresponding to nodes. the first delta is

1117

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1120

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1118

have this parent as it has all history before these

1121

have this parent as it has all history before these

1119

changesets. parent is parent[0]

1122

changesets. parent is parent[0]

1120

"""

1123

"""

1121

revs = [self.rev(n) for n in nodelist]

1124

revs = [self.rev(n) for n in nodelist]

1122

1125

1123

# if we don't have any revisions touched by these changesets, bail

1126

# if we don't have any revisions touched by these changesets, bail

1124

if not revs:

1127

if not revs:

1125

yield changegroup.closechunk()

1128

yield changegroup.closechunk()

1126

return

1129

return

1127

1130

1128

# add the parent of the first rev

1131

# add the parent of the first rev

1129

p = self.parents(self.node(revs[0]))[0]

1132

p = self.parents(self.node(revs[0]))[0]

1130

revs.insert(0, self.rev(p))

1133

revs.insert(0, self.rev(p))

1131

1134

1132

# build deltas

1135

# build deltas

1133

for d in xrange(0, len(revs) - 1):

1136

for d in xrange(0, len(revs) - 1):

1134

a, b = revs[d], revs[d + 1]

1137

a, b = revs[d], revs[d + 1]

1135

nb = self.node(b)

1138

nb = self.node(b)

1136

1139

1137

if infocollect is not None:

1140

if infocollect is not None:

1138

infocollect(nb)

1141

infocollect(nb)

1139

1142

1140

p = self.parents(nb)

1143

p = self.parents(nb)

1141

meta = nb + p[0] + p[1] + lookup(nb)

1144

meta = nb + p[0] + p[1] + lookup(nb)

1142

if a == -1:

1145

if a == -1:

1143

d = self.revision(nb)

1146

d = self.revision(nb)

1144

meta += mdiff.trivialdiffheader(len(d))

1147

meta += mdiff.trivialdiffheader(len(d))

1145

else:

1148

else:

1146

d = self.revdiff(a, b)

1149

d = self.revdiff(a, b)

1147

yield changegroup.chunkheader(len(meta) + len(d))

1150

yield changegroup.chunkheader(len(meta) + len(d))

1148

yield meta

1151

yield meta

1149

if len(d) > 2**20:

1152

if len(d) > 2**20:

1150

pos = 0

1153

pos = 0

1151

while pos < len(d):

1154

while pos < len(d):

1152

pos2 = pos + 2 ** 18

1155

pos2 = pos + 2 ** 18

1153

yield d[pos:pos2]

1156

yield d[pos:pos2]

1154

pos = pos2

1157

pos = pos2

1155

else:

1158

else:

1156

yield d

1159

yield d

1157

1160

1158

yield changegroup.closechunk()

1161

yield changegroup.closechunk()

1159

1162

1160

def addgroup(self, revs, linkmapper, transaction):

1163

def addgroup(self, revs, linkmapper, transaction):

1161

"""

1164

"""

1162

add a delta group

1165

add a delta group

1163

1166

1164

given a set of deltas, add them to the revision log. the

1167

given a set of deltas, add them to the revision log. the

1165

first delta is against its parent, which should be in our

1168

first delta is against its parent, which should be in our

1166

log, the rest are against the previous delta.

1169

log, the rest are against the previous delta.

1167

"""

1170

"""

1168

1171

1169

#track the base of the current delta log

1172

#track the base of the current delta log

1170

r = len(self)

1173

r = len(self)

1171

t = r - 1

1174

t = r - 1

1172

node = None

1175

node = None

1173

1176

1174

base = prev = nullrev

1177

base = prev = nullrev

1175

start = end = textlen = 0

1178

start = end = textlen = 0

1176

if r:

1179

if r:

1177

end = self.end(t)

1180

end = self.end(t)

1178

1181

1179

ifh = self.opener(self.indexfile, "a+")

1182

ifh = self.opener(self.indexfile, "a+")

1180

isize = r * self._io.size

1183

isize = r * self._io.size

1181

if self._inline:

1184

if self._inline:

1182

transaction.add(self.indexfile, end + isize, r)

1185

transaction.add(self.indexfile, end + isize, r)

1183

dfh = None

1186

dfh = None

1184

else:

1187

else:

1185

transaction.add(self.indexfile, isize, r)

1188

transaction.add(self.indexfile, isize, r)

1186

transaction.add(self.datafile, end)

1189

transaction.add(self.datafile, end)

1187

dfh = self.opener(self.datafile, "a")

1190

dfh = self.opener(self.datafile, "a")

1188

1191

1189

try:

1192

try:

1190

# loop through our set of deltas

1193

# loop through our set of deltas

1191

chain = None

1194

chain = None

1192

for chunk in revs:

1195

for chunk in revs:

1193

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1196

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1194

link = linkmapper(cs)

1197

link = linkmapper(cs)

1195

if node in self.nodemap:

1198

if node in self.nodemap:

1196

# this can happen if two branches make the same change

1199

# this can happen if two branches make the same change

1197

chain = node

1200

chain = node

1198

continue

1201

continue

1199

delta = buffer(chunk, 80)

1202

delta = buffer(chunk, 80)

1200

del chunk

1203

del chunk

1201

1204

1202

for p in (p1, p2):

1205

for p in (p1, p2):

1203

if not p in self.nodemap:

1206

if not p in self.nodemap:

1204

raise LookupError(p, self.indexfile, _('unknown parent'))

1207

raise LookupError(p, self.indexfile, _('unknown parent'))

1205

1208

1206

if not chain:

1209

if not chain:

1207

# retrieve the parent revision of the delta chain

1210

# retrieve the parent revision of the delta chain

1208

chain = p1

1211

chain = p1

1209

if not chain in self.nodemap:

1212

if not chain in self.nodemap:

1210

raise LookupError(chain, self.indexfile, _('unknown base'))

1213

raise LookupError(chain, self.indexfile, _('unknown base'))

1211

1214

1212

# full versions are inserted when the needed deltas become

1215

# full versions are inserted when the needed deltas become

1213

# comparable to the uncompressed text or when the previous

1216

# comparable to the uncompressed text or when the previous

1214

# version is not the one we have a delta against. We use

1217

# version is not the one we have a delta against. We use

1215

# the size of the previous full rev as a proxy for the

1218

# the size of the previous full rev as a proxy for the

1216

# current size.

1219

# current size.

1217

1220

1218

if chain == prev:

1221

if chain == prev:

1219

cdelta = compress(delta)

1222

cdelta = compress(delta)

1220

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1223

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1221

textlen = mdiff.patchedsize(textlen, delta)

1224

textlen = mdiff.patchedsize(textlen, delta)

1222

1225

1223

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1226

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1224

# flush our writes here so we can read it in revision

1227

# flush our writes here so we can read it in revision

1225

if dfh:

1228

if dfh:

1226

dfh.flush()

1229

dfh.flush()

1227

ifh.flush()

1230

ifh.flush()

1228

text = self.revision(chain)

1231

text = self.revision(chain)

1229

if len(text) == 0:

1232

if len(text) == 0:

1230

# skip over trivial delta header

1233

# skip over trivial delta header

1231

text = buffer(delta, 12)

1234

text = buffer(delta, 12)

1232

else:

1235

else:

1233

text = mdiff.patches(text, [delta])

1236

text = mdiff.patches(text, [delta])

1234

del delta

1237

del delta

1235

chk = self._addrevision(text, transaction, link, p1, p2, None,

1238

chk = self._addrevision(text, transaction, link, p1, p2, None,

1236

ifh, dfh)

1239

ifh, dfh)

1237

if not dfh and not self._inline:

1240

if not dfh and not self._inline:

1238

# addrevision switched from inline to conventional

1241

# addrevision switched from inline to conventional

1239

# reopen the index

1242

# reopen the index

1240

dfh = self.opener(self.datafile, "a")

1243

dfh = self.opener(self.datafile, "a")

1241

ifh = self.opener(self.indexfile, "a")

1244

ifh = self.opener(self.indexfile, "a")

1242

if chk != node:

1245

if chk != node:

1243

raise RevlogError(_("consistency error adding group"))

1246

raise RevlogError(_("consistency error adding group"))

1244

textlen = len(text)

1247

textlen = len(text)

1245

else:

1248

else:

1246

e = (offset_type(end, 0), cdeltalen, textlen, base,

1249

e = (offset_type(end, 0), cdeltalen, textlen, base,

1247

link, self.rev(p1), self.rev(p2), node)

1250

link, self.rev(p1), self.rev(p2), node)

1248

self.index.insert(-1, e)

1251

self.index.insert(-1, e)

1249

self.nodemap[node] = r

1252

self.nodemap[node] = r

1250

entry = self._io.packentry(e, self.node, self.version, r)

1253

entry = self._io.packentry(e, self.node, self.version, r)

1251

if self._inline:

1254

if self._inline:

1252

ifh.write(entry)

1255

ifh.write(entry)

1253

ifh.write(cdelta[0])

1256

ifh.write(cdelta[0])

1254

ifh.write(cdelta[1])

1257

ifh.write(cdelta[1])

1255

self.checkinlinesize(transaction, ifh)

1258

self.checkinlinesize(transaction, ifh)

1256

if not self._inline:

1259

if not self._inline:

1257

dfh = self.opener(self.datafile, "a")

1260

dfh = self.opener(self.datafile, "a")

1258

ifh = self.opener(self.indexfile, "a")

1261

ifh = self.opener(self.indexfile, "a")

1259

else:

1262

else:

1260

dfh.write(cdelta[0])

1263

dfh.write(cdelta[0])

1261

dfh.write(cdelta[1])

1264

dfh.write(cdelta[1])

1262

ifh.write(entry)

1265

ifh.write(entry)

1263

1266

1264

t, r, chain, prev = r, r + 1, node, node

1267

t, r, chain, prev = r, r + 1, node, node

1265

base = self.base(t)

1268

base = self.base(t)

1266

start = self.start(base)

1269

start = self.start(base)

1267

end = self.end(t)

1270

end = self.end(t)

1268

finally:

1271

finally:

1269

if dfh:

1272

if dfh:

1270

dfh.close()

1273

dfh.close()

1271

ifh.close()

1274

ifh.close()

1272

1275

1273

return node

1276

return node

1274

1277

1275

def strip(self, minlink):

1278

def strip(self, minlink):

1276

"""truncate the revlog on the first revision with a linkrev >= minlink

1279

"""truncate the revlog on the first revision with a linkrev >= minlink

1277

1280

1278

This function is called when we're stripping revision minlink and

1281

This function is called when we're stripping revision minlink and

1279

its descendants from the repository.

1282

its descendants from the repository.

1280

1283

1281

We have to remove all revisions with linkrev >= minlink, because

1284

We have to remove all revisions with linkrev >= minlink, because

1282

the equivalent changelog revisions will be renumbered after the

1285

the equivalent changelog revisions will be renumbered after the

1283

strip.

1286

strip.

1284

1287

1285

So we truncate the revlog on the first of these revisions, and

1288

So we truncate the revlog on the first of these revisions, and

1286

trust that the caller has saved the revisions that shouldn't be

1289

trust that the caller has saved the revisions that shouldn't be

1287

removed and that it'll readd them after this truncation.

1290

removed and that it'll readd them after this truncation.

1288

"""

1291

"""

1289

if len(self) == 0:

1292

if len(self) == 0:

1290

return

1293

return

1291

1294

1292

if isinstance(self.index, lazyindex):

1295

if isinstance(self.index, lazyindex):

1293

self._loadindexmap()

1296

self._loadindexmap()

1294

1297

1295

for rev in self:

1298

for rev in self:

1296

if self.index[rev][4] >= minlink:

1299

if self.index[rev][4] >= minlink:

1297

break

1300

break

1298

else:

1301

else:

1299

return

1302

return

1300

1303

1301

# first truncate the files on disk

1304

# first truncate the files on disk

1302

end = self.start(rev)

1305

end = self.start(rev)

1303

if not self._inline:

1306

if not self._inline:

1304

df = self.opener(self.datafile, "a")

1307

df = self.opener(self.datafile, "a")

1305

df.truncate(end)

1308

df.truncate(end)

1306

end = rev * self._io.size

1309

end = rev * self._io.size

1307

else:

1310

else:

1308

end += rev * self._io.size

1311

end += rev * self._io.size

1309

1312

1310

indexf = self.opener(self.indexfile, "a")

1313

indexf = self.opener(self.indexfile, "a")

1311

indexf.truncate(end)

1314

indexf.truncate(end)

1312

1315

1313

# then reset internal state in memory to forget those revisions

1316

# then reset internal state in memory to forget those revisions

1314

self._cache = None

1317

self._cache = None

1315

self._chunkcache = None

1318

self._chunkcache = None

1316

for x in xrange(rev, len(self)):

1319

for x in xrange(rev, len(self)):

1317

del self.nodemap[self.node(x)]

1320

del self.nodemap[self.node(x)]

1318

1321

1319

del self.index[rev:-1]

1322

del self.index[rev:-1]

1320

1323

1321

def checksize(self):

1324

def checksize(self):

1322

expected = 0

1325

expected = 0

1323

if len(self):

1326

if len(self):

1324

expected = max(0, self.end(len(self) - 1))

1327

expected = max(0, self.end(len(self) - 1))

1325

1328

1326

try:

1329

try:

1327

f = self.opener(self.datafile)

1330

f = self.opener(self.datafile)

1328

f.seek(0, 2)

1331

f.seek(0, 2)

1329

actual = f.tell()

1332

actual = f.tell()

1330

dd = actual - expected

1333

dd = actual - expected

1331

except IOError, inst:

1334

except IOError, inst:

1332

if inst.errno != errno.ENOENT:

1335

if inst.errno != errno.ENOENT:

1333

raise

1336

raise

1334

dd = 0

1337

dd = 0

1335

1338

1336

try:

1339

try:

1337

f = self.opener(self.indexfile)

1340

f = self.opener(self.indexfile)

1338

f.seek(0, 2)

1341

f.seek(0, 2)

1339

actual = f.tell()

1342

actual = f.tell()

1340

s = self._io.size

1343

s = self._io.size

1341

i = max(0, actual / s)

1344

i = max(0, actual / s)

1342

di = actual - (i * s)

1345

di = actual - (i * s)

1343

if self._inline:

1346

if self._inline:

1344

databytes = 0

1347

databytes = 0

1345

for r in self:

1348

for r in self:

1346

databytes += max(0, self.length(r))

1349

databytes += max(0, self.length(r))

1347

dd = 0

1350

dd = 0

1348

di = actual - len(self) * s - databytes

1351

di = actual - len(self) * s - databytes

1349

except IOError, inst:

1352

except IOError, inst:

1350

if inst.errno != errno.ENOENT:

1353

if inst.errno != errno.ENOENT:

1351

raise

1354

raise

1352

di = 0

1355

di = 0

1353

1356

1354

return (dd, di)

1357

return (dd, di)

1355

1358

1356

def files(self):

1359

def files(self):

1357

res = [ self.indexfile ]

1360

res = [ self.indexfile ]

1358

if not self._inline:

1361

if not self._inline:

1359

res.append(self.datafile)

1362

res.append(self.datafile)

1360

return res

1363

return res

             #!/bin/sh
             echo % prepare repo
-            hg init
+            hg init a
+            cd a
             echo "some text" > FOO.txt
             echo "another text" > bar.txt
             echo "more text" > QUICK.txt
             hg add
             hg ci -d '0 0' -mtest1
             echo
             echo % verify
             hg verify
             echo
             echo % introduce some bugs in repo
             cd .hg/store/data
             mv _f_o_o.txt.i X_f_o_o.txt.i
             mv bar.txt.i xbar.txt.i
             rm _q_u_i_c_k.txt.i
             echo
             echo % verify
             hg verify
+            cd ..
+            echo % test revlog corruption
+            hg init b
+            cd b
+            touch a
+            hg add a
+            hg ci -m a
+            echo 'corrupted' > b
+            head -c 20 .hg/store/data/a.i > start
+            cat start b > .hg/store/data/a.i
+            echo
+            echo % verify
+            hg verify
             exit 0

             % prepare repo
             adding FOO.txt
             adding QUICK.txt
             adding bar.txt
             % verify
             checking changesets
             checking manifests
             crosschecking files in changesets and manifests
             checking files
 files, 1 changesets, 3 total revisions
             % introduce some bugs in repo
             % verify
             checking changesets
             checking manifests
             crosschecking files in changesets and manifests
             checking files
              data/FOO.txt.i@0: missing revlog!
 : empty or missing FOO.txt
              FOO.txt@0: f62022d3d590 in manifests not found
              data/QUICK.txt.i@0: missing revlog!
 : empty or missing QUICK.txt
              QUICK.txt@0: 88b857db8eba in manifests not found
              data/bar.txt.i@0: missing revlog!
 : empty or missing bar.txt
              bar.txt@0: 256559129457 in manifests not found
 files, 1 changesets, 0 total revisions
 integrity errors encountered!
             (first damaged changeset appears to be 0)
+            % test revlog corruption
+            % verify
+            checking changesets
+            checking manifests
+            crosschecking files in changesets and manifests
+            checking files
+             a@0: broken revlog! (index data/a.i is corrupted)
+            warning: orphan revlog 'data/a.i'
+files, 1 changesets, 0 total revisions
+warnings encountered!
+integrity errors encountered!
+            (first damaged changeset appears to be 0)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import bin, hex, nullid, nullrev, short
             from i18n import _
             import changegroup, errno, ancestor, mdiff, parsers
             import struct, util, zlib, error
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = _sha(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
                 bin = None
                 if l < 44:
                     pass
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
                 if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     return _decompress(bin)
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 def __init__(self, dataf, size):
                     self.dataf = dataf
                     self.s = struct.calcsize(indexformatng)
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: nullrev}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap:
                         return
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + ngshaoffset:off + ngshaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all:
                         return
                     if data is None:
                         self.dataf.seek(blockstart)
                         if blockstart + blocksize > self.datasize:
                             # the revlog may have grown since we've started running,
                             # but we don't have space in self.index for more entries.
                             # limit blocksize so that we don't get too much data.
                             blocksize = max(self.datasize - blockstart, 0)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     # lazyindex supports __delitem__
                     if lend > len(self.index) - i:
                         lend = len(self.index) - i
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[ngshaoffset:ngshaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap:
                         return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so we have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.find(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + ngshaoffset:off + ngshaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all:
                         return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (65536 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~1023) * self.s
                             blocksize = self.s * 1024
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all:
                         self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = _pack(indexformatng, *item)
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def insert(self, pos, e):
                     self.p.index.insert(pos, _pack(indexformatng, *e))
                 def append(self, e):
                     self.p.index.append(_pack(indexformatng, *e))
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         ret = self.p.index[i]
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = _unpack(indexformatng, ret)
                         yield ret[7]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, fp, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     data = fp.read()
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, fp, inline):
                     try:
                         size = util.fstat(fp).st_size
                     except AttributeError:
                         size = 0
                     if util.openhardlinks() and not inline and size > 1000000:
                         # big index, let's parse it on demand
                         parser = lazyparser(fp, size)
                         index = lazyindex(parser)
                         nodemap = lazymap(parser)
                         e = list(index[0])
                         type = gettype(e[0])
                         e[0] = offset_type(0, type)
                         index[0] = e
                         return index, nodemap, None
                     data = fp.read()
                     # call the C implementation to parse the index data
                     index, nodemap, cache = parsers.parse_index(data, inline)
                     return index, nodemap, cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._chunkcache = None
                     self.nodemap = {nullid: nullrev}
                     self.index = []
                     v = REVLOG_DEFAULT_VERSION
                     if hasattr(opener, "defversion"):
                         v = opener.defversion
                         if v & REVLOGNG:
                             v |= REVLOGNGINLINEDATA
                     i = ""
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i)[0]
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     if i:
-                        d = self._io.parseindex(f, self._inline)
+                        try:
+                            d = self._io.parseindex(f, self._inline)
+                        except (ValueError, IndexError), e:
+                            raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                         self.index, self.nodemap, self._chunkcache = d
                     # add the magic null revision at -1 (if it hasn't been done already)
                     if (self.index == [] or isinstance(self.index, lazyindex) or
                         self.index[-1][7] != nullid) :
                         self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                 def _loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def _loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def _loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     for i in xrange(len(self)):
                         yield i
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def base(self, rev):
                     return self.index[rev][3]
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def reachable(self, node, stop=None):
                     """return a hash of all nodes ancestral to a given node, including
                      the node itself, stopping when stop is matched"""
                     reachable = {}
                     visit = [node]
                     reachable[node] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def ancestors(self, *revs):
                     'Generate the ancestors of revs using a breadth-first visit'
                     visit = list(revs)
                     seen = util.set([nullrev])
                     while visit:
                         for parent in self.parentrevs(visit.pop(0)):
                             if parent not in seen:
                                 visit.append(parent)
                                 seen.add(parent)
                                 yield parent
                 def descendants(self, *revs):
                     'Generate the descendants of revs in topological order'
                     seen = util.set(revs)
                     for i in xrange(min(revs) + 1, len(self)):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findmissing(self, common=None, heads=None):
                     '''
                     returns the topologically sorted list of nodes from the set:
                     missing = (ancestors(heads) \ ancestors(common))
                     where ancestors() is the set of ancestors from heads, heads included
                     if heads is None, the heads of the revlog are used
                     if common is None, nullid is assumed to be a common node
                     '''
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     has = dict.fromkeys(self.ancestors(*common))
                     has[nullrev] = None
                     for r in common:
                         has[r] = None
                     # take all ancestors from heads that aren't in has
                     missing = {}
                     visit = [r for r in heads if r not in has]
                     while visit:
                         r = visit.pop(0)
                         if r in missing:
                             continue
                         else:
                             missing[r] = None
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = missing.keys()
                     missing.sort()
                     return [self.node(r) for r in missing]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads.keys()
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == nullrev:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         count = len(self)
                         if not count:
                             return [nullid]
                         ishead = [1] * (count + 1)
                         index = self.index
                         for r in xrange(count):
                             e = index[r]
                             ishead[e[5]] = ishead[e[6]] = 0
                         return [self.node(r) for r in xrange(count) if ishead[r]]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = dict.fromkeys([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev: 1}
                     heads = {startrev: 1}
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, len(self)):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable[r] = 1
                                 heads[r] = 1
                             if p in heads and p not in stoprevs:
                                 del heads[p]
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, len(self)):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def _match(self, id):
                     if isinstance(id, (long, int)):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             r = self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             r = self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) / 2  # grab an even number of digits
                             bin_id = bin(id[:l*2])
                             nl = [n for n in self.nodemap if n[:l] == bin_id]
                             nl = [n for n in nl if hex(n).startswith(id)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision"""
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def chunk(self, rev, df=None):
                     def loadcache(df):
                         if not df:
                             if self._inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self._chunkcache = (start, df.read(cache_length))
                     start, length = self.start(rev), self.length(rev)
                     if self._inline:
                         start += (rev + 1) * self._io.size
                     end = start + length
                     offset = 0
                     if not self._chunkcache:
                         cache_length = max(65536, length)
                         loadcache(df)
                     else:
                         cache_start = self._chunkcache[0]
                         cache_length = len(self._chunkcache[1])
                         cache_end = cache_start + cache_length
                         if start >= cache_start and end <= cache_end:
                             # it is cached
                             offset = start - cache_start
                         else:
                             cache_length = max(65536, length)
                             loadcache(df)
                     # avoid copying large chunks
                     c = self._chunkcache[1]
                     if cache_length != length:
                         c = c[offset:offset + length]
                     return decompress(c)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
                         return self.chunk(rev2)
                     return mdiff.textdiff(self.revision(self.node(rev1)),
                                           self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given node"""
                     if node == nullid:
                         return ""
                     if self._cache and self._cache[0] == node:
                         return str(self._cache[2])
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # check rev flags
                     if self.index[rev][0] & 0xFFFF:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.index[rev][0] & 0xFFFF))
                     df = None
                     # do we have useful data cached?
                     if self._cache and self._cache[1] >= base and self._cache[1] < rev:
                         base = self._cache[1]
                         text = str(self._cache[2])
                         self._loadindex(base, rev + 1)
                         if not self._inline and rev > base + 1:
                             df = self.opener(self.datafile)
                     else:
                         self._loadindex(base, rev + 1)
                         if not self._inline and rev > base:
                             df = self.opener(self.datafile)
                         text = self.chunk(base, df=df)
                     bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
                     text = mdiff.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.datafile, rev))
                     self._cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline:
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                         fp.seek(0, 2)
                     size = fp.tell()
                     if size < 131072:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo == None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     df = self.opener(self.datafile, 'w')
                     try:
                         calc = self._io.size
                         for r in self:
                             start = self.start(r) + (r + 1) * calc
                             length = self.length(r)
                             fp.seek(start)
                             d = fp.read(length)
                             df.write(d)
                     finally:
                         df.close()
                     fp.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * calc)
                     self._chunkcache = None
                 def addrevision(self, text, transaction, link, p1, p2, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     curr = len(self)
                     prev = curr - 1
                     base = self.base(prev)
                     offset = self.end(prev)
                     if curr:
                         if not d:
                             ptext = self.revision(self.node(prev))
                             d = mdiff.textdiff(ptext, text)
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = l + offset - self.start(base)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not curr or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = curr
                     e = (offset_type(offset, 0), l, len(text),
                          base, link, self.rev(p1), self.rev(p2), node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     self._cache = (node, curr, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         if a == -1:
                             d = self.revision(nb)
                             meta += mdiff.trivialdiffheader(len(d))
                         else:
                             d = self.revdiff(a, b)
                         yield changegroup.chunkheader(len(meta) + len(d))
                         yield meta
                         if len(d) > 2**20:
                             pos = 0
                             while pos < len(d):
                                 pos2 = pos + 2 ** 18
                                 yield d[pos:pos2]
                                 pos = pos2
                         else:
                             yield d
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = len(self)
                     t = r - 1
                     node = None
                     base = prev = nullrev
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunk in revs:
                             node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             delta = buffer(chunk, 80)
                             del chunk
                             for p in (p1, p2):
                                 if not p in self.nodemap:
                                     raise LookupError(p, self.indexfile, _('unknown parent'))
                             if not chain:
                                 # retrieve the parent revision of the delta chain
                                 chain = p1
                                 if not chain in self.nodemap:
                                     raise LookupError(chain, self.indexfile, _('unknown base'))
                             # full versions are inserted when the needed deltas become
                             # comparable to the uncompressed text or when the previous
                             # version is not the one we have a delta against. We use
                             # the size of the previous full rev as a proxy for the
                             # current size.
                             if chain == prev:
                                 cdelta = compress(delta)
                                 cdeltalen = len(cdelta[0]) + len(cdelta[1])
                                 textlen = mdiff.patchedsize(textlen, delta)
                             if chain != prev or (end - start + cdeltalen) > textlen * 2:
                                 # flush our writes here so we can read it in revision
                                 if dfh:
                                     dfh.flush()
                                 ifh.flush()
                                 text = self.revision(chain)
                                 if len(text) == 0:
                                     # skip over trivial delta header
                                     text = buffer(delta, 12)
                                 else:
                                     text = mdiff.patches(text, [delta])
                                 del delta
                                 chk = self._addrevision(text, transaction, link, p1, p2, None,
                                                         ifh, dfh)
                                 if not dfh and not self._inline:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 if chk != node:
                                     raise RevlogError(_("consistency error adding group"))
                                 textlen = len(text)
                             else:
                                 e = (offset_type(end, 0), cdeltalen, textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                                 self.index.insert(-1, e)
                                 self.nodemap[node] = r
                                 entry = self._io.packentry(e, self.node, self.version, r)
                                 if self._inline:
                                     ifh.write(entry)
                                     ifh.write(cdelta[0])
                                     ifh.write(cdelta[1])
                                     self.checkinlinesize(transaction, ifh)
                                     if not self._inline:
                                         dfh = self.opener(self.datafile, "a")
                                         ifh = self.opener(self.indexfile, "a")
                                 else:
                                     dfh.write(cdelta[0])
                                     dfh.write(cdelta[1])
                                     ifh.write(entry)
                             t, r, chain, prev = r, r + 1, node, node
                             base = self.base(t)
                             start = self.start(base)
                             end = self.end(t)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return node
                 def strip(self, minlink):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll readd them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     if isinstance(self.index, lazyindex):
                         self._loadindexmap()
                     for rev in self:
                         if self.index[rev][4] >= minlink:
                             break
                     else:
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkcache = None
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = self._io.size
                         i = max(0, actual / s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [ self.indexfile ]
                     if not self._inline:
                         res.append(self.datafile)
                     return res