upstream/mercurial-mirror Commit - r2079:ee96ca27

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

17

demandload(globals(), "sha struct zlib")

17

demandload(globals(), "sha struct zlib")

18

19

# revlog version strings

19

# revlog version strings

20

REVLOGV0 = 0

20

REVLOGV0 = 0

21

REVLOGNG = 1

21

REVLOGNG = 1

22

23

# revlog flags

23

# revlog flags

24

REVLOGNGINLINEDATA = (1 << 16)

24

REVLOGNGINLINEDATA = (1 << 16)

25

26

def flagstr(flag):

26

def flagstr(flag):

27

if flag == "inline":

27

if flag == "inline":

28

return REVLOGNGINLINEDATA

28

return REVLOGNGINLINEDATA

29

raise RevlogError(_("unknown revlog flag %s" % flag))

29

raise RevlogError(_("unknown revlog flag %s" % flag))

30

31

def hash(text, p1, p2):

31

def hash(text, p1, p2):

32

"""generate a hash from the given text and its parent hashes

32

"""generate a hash from the given text and its parent hashes

33

34

This hash combines both the current file contents and its history

34

This hash combines both the current file contents and its history

35

in a manner that makes it easy to distinguish nodes with the same

35

in a manner that makes it easy to distinguish nodes with the same

36

content in the revision graph.

36

content in the revision graph.

37

"""

37

"""

38

l = [p1, p2]

38

l = [p1, p2]

39

l.sort()

39

l.sort()

40

s = sha.new(l[0])

40

s = sha.new(l[0])

41

s.update(l[1])

41

s.update(l[1])

42

s.update(text)

42

s.update(text)

43

return s.digest()

43

return s.digest()

44

45

def compress(text):

45

def compress(text):

46

""" generate a possibly-compressed representation of text """

46

""" generate a possibly-compressed representation of text """

47

if not text: return ("", text)

47

if not text: return ("", text)

48

if len(text) < 44:

48

if len(text) < 44:

49

if text[0] == '\0': return ("", text)

49

if text[0] == '\0': return ("", text)

50

return ('u', text)

50

return ('u', text)

51

bin = zlib.compress(text)

51

bin = zlib.compress(text)

52

if len(bin) > len(text):

52

if len(bin) > len(text):

53

if text[0] == '\0': return ("", text)

53

if text[0] == '\0': return ("", text)

54

return ('u', text)

54

return ('u', text)

55

return ("", bin)

55

return ("", bin)

56

57

def decompress(bin):

57

def decompress(bin):

58

""" decompress the given input """

58

""" decompress the given input """

59

if not bin: return bin

59

if not bin: return bin

60

t = bin[0]

60

t = bin[0]

61

if t == '\0': return bin

61

if t == '\0': return bin

62

if t == 'x': return zlib.decompress(bin)

62

if t == 'x': return zlib.decompress(bin)

63

if t == 'u': return bin[1:]

63

if t == 'u': return bin[1:]

64

raise RevlogError(_("unknown compression type %r") % t)

64

raise RevlogError(_("unknown compression type %r") % t)

65

66

indexformatv0 = ">4l20s20s20s"

66

indexformatv0 = ">4l20s20s20s"

67

v0shaoffset = 56

67

# index ng:

68

# index ng:

68

# 6 bytes offset

69

# 6 bytes offset

69

# 2 bytes flags

70

# 2 bytes flags

70

# 4 bytes compressed length

71

# 4 bytes compressed length

71

# 4 bytes uncompressed length

72

# 4 bytes uncompressed length

72

# 4 bytes: base rev

73

# 4 bytes: base rev

73

# 4 bytes link rev

74

# 4 bytes link rev

74

# 4 bytes parent 1 rev

75

# 4 bytes parent 1 rev

75

# 4 bytes parent 2 rev

76

# 4 bytes parent 2 rev

76

# 32 bytes: nodeid

77

# 32 bytes: nodeid

77

indexformatng = ">Qiiiiii20s12x"

78

indexformatng = ">Qiiiiii20s12x"

79

ngshaoffset = 32

78

versionformat = ">i"

80

versionformat = ">i"

79

81

80

class lazyparser(object):

82

class lazyparser(object):

81

"""

83

"""

82

this class avoids the need to parse the entirety of large indices

84

this class avoids the need to parse the entirety of large indices

83

84

By default we parse and load 1000 entries at a time.

85

86

If no position is specified, we load the whole index, and replace

87

the lazy objects in revlog with the underlying objects for

88

efficiency in cases where we look at most of the nodes.

89

"""

85

"""

90

def __init__(self, data, ~~revlog~~, indexformat):

86

def __init__(self, dataf, size, indexformat, shaoffset):

91

self.data = data

87

self.dataf = dataf

88

self.format = indexformat

92

self.s = struct.calcsize(indexformat)

89

self.s = struct.calcsize(indexformat)

93

self.indexformat = indexformat

90

self.indexformat = indexformat

94

self.l = ~~len~~(~~data~~)/self.s

91

self.datasize = size

92

self.l = size/self.s

95

self.index = [None] * self.l

93

self.index = [None] * self.l

96

self.map = {nullid: -1}

94

self.map = {nullid: -1}

95

self.allmap = 0

97

self.all = 0

96

self.all = 0

98

self.revlog = revlog

97

self.mapfind_count = 0

98

self.shaoffset = shaoffset

99

100

def load(self, ~~pos~~=~~None~~):

100

def loadmap(self):

101

"""

102

during a commit, we need to make sure the rev being added is

103

not a duplicate. This requires loading the entire index,

104

which is fairly slow. loadmap can load up just the node map,

105

which takes much less time.

106

"""

107

if self.allmap: return

108

start = 0

109

end = self.datasize

110

self.allmap = 1

111

cur = 0

112

count = 0

113

blocksize = self.s * 256

114

self.dataf.seek(0)

115

while cur < end:

116

data = self.dataf.read(blocksize)

117

off = 0

118

for x in xrange(256):

119

n = data[off + self.shaoffset:off + self.shaoffset + 20]

120

self.map[n] = count

121

count += 1

122

if count >= self.l:

123

break

124

off += self.s

125

cur += blocksize

126

127

def loadblock(self, blockstart, blocksize, data=None):

101

if self.all: return

128

if self.all: return

102

if ~~pos~~ is ~~not~~ None:

129

if data is None:

103

block = pos / 1000

130

self.dataf.seek(blockstart)

104

i = block * 1000

131

data = self.dataf.read(blocksize)

105

end = min(self.l, i + 1000)

132

lend = len(data) / self.s

106

else:

133

i = blockstart / self.s

134

off = 0

135

for x in xrange(lend):

136

if self.index[i + x] == None:

137

b = data[off : off + self.s]

138

e = struct.unpack(self.format, b)

139

self.index[i + x] = e

140

self.map[e[-1]] = i + x

141

off += self.s

142

143

def findnode(self, node):

144

"""search backwards through the index file for a specific node"""

145

if self.allmap: return None

146

147

# hg log will cause many many searches for the manifest

148

# nodes. After we get called a few times, just load the whole

149

# thing.

150

if self.mapfind_count > 8:

151

self.loadmap()

152

if node in self.map:

153

return node

154

return None

155

self.mapfind_count += 1

156

last = self.l - 1

157

while self.index[last] != None:

158

if last == 0:

107

self.all = 1

159

self.all = 1

108

i = 0

160

self.allmap = 1

109

end = ~~self~~.l

161

return None

110

self.revlog.index = self.index

162

last -= 1

111

self.revlog.nodemap = self.map

163

end = (last + 1) * self.s

164

blocksize = self.s * 256

165

while end >= 0:

166

start = max(end - blocksize, 0)

167

self.dataf.seek(start)

168

data = self.dataf.read(end - start)

169

findend = end - start

170

while True:

171

# we're searching backwards, so weh have to make sure

172

# we don't find a changeset where this node is a parent

173

off = data.rfind(node, 0, findend)

174

findend = off

175

if off >= 0:

176

i = off / self.s

177

off = i * self.s

178

n = data[off + self.shaoffset:off + self.shaoffset + 20]

179

if n == node:

180

self.map[n] = i + start / self.s

181

return node

182

else:

183

break

184

end -= blocksize

185

return None

112

186

113

while i < end:

187

def loadindex(self, i=None, end=None):

114

if not self.index[i]:

188

if self.all: return

115

d = self.data[i * self.s: (i + 1) * self.s]

189

all = False

116

e = struct.unpack(self.indexformat, d)

190

if i == None:

117

self.index[i] = e

191

blockstart = 0

118

self.map[e[-1]] = i

192

blocksize = (512 / self.s) * self.s

119

i += 1

193

end = self.datasize

194

all = True

195

else:

196

if end:

197

blockstart = i * self.s

198

end = end * self.s

199

blocksize = end - blockstart

200

else:

201

blockstart = (i & ~(32)) * self.s

202

blocksize = self.s * 64

203

end = blockstart + blocksize

204

while blockstart < end:

205

self.loadblock(blockstart, blocksize)

206

blockstart += blocksize

207

if all: self.all = True

120

208

121

class lazyindex(object):

209

class lazyindex(object):

122

"""a lazy version of the index array"""

210

"""a lazy version of the index array"""

123

def __init__(self, parser):

211

def __init__(self, parser):

124

self.p = parser

212

self.p = parser

125

def __len__(self):

213

def __len__(self):

126

return len(self.p.index)

214

return len(self.p.index)

127

def load(self, pos):

215

def load(self, pos):

128

if pos < 0:

216

if pos < 0:

129

pos += len(self.p.index)

217

pos += len(self.p.index)

130

self.p.load(pos)

218

self.p.loadindex(pos)

131

return self.p.index[pos]

219

return self.p.index[pos]

132

def __getitem__(self, pos):

220

def __getitem__(self, pos):

133

return self.p.index[pos] or self.load(pos)

221

return self.p.index[pos] or self.load(pos)

134

def __setitem__(self, pos, item):

222

def __setitem__(self, pos, item):

135

self.p.index[pos] = item

223

self.p.index[pos] = item

136

def __delitem__(self, pos):

224

def __delitem__(self, pos):

137

del self.p.index[pos]

225

del self.p.index[pos]

138

def append(self, e):

226

def append(self, e):

139

self.p.index.append(e)

227

self.p.index.append(e)

140

228

141

class lazymap(object):

229

class lazymap(object):

142

"""a lazy version of the node map"""

230

"""a lazy version of the node map"""

143

def __init__(self, parser):

231

def __init__(self, parser):

144

self.p = parser

232

self.p = parser

145

def load(self, key):

233

def load(self, key):

146

if self.p.all: return

234

n = self.p.findnode(key)

147

n = self.p.data.find(key)

235

if n == None:

148

if n < 0:

149

raise KeyError(key)

236

raise KeyError(key)

150

pos = n / self.p.s

151

self.p.load(pos)

152

def __contains__(self, key):

237

def __contains__(self, key):

153

self.p.~~load~~()

238

if key in self.p.map:

239

return True

240

self.p.loadmap()

154

return key in self.p.map

241

return key in self.p.map

155

def __iter__(self):

242

def __iter__(self):

156

yield nullid

243

yield nullid

157

for i in xrange(self.p.l):

244

for i in xrange(self.p.l):

158

try:

245

try:

159

yield self.p.index[i][-1]

246

yield self.p.index[i][-1]

160

except:

247

except:

161

self.p.load(i)

248

self.p.loadindex(i)

162

yield self.p.index[i][-1]

249

yield self.p.index[i][-1]

163

def __getitem__(self, key):

250

def __getitem__(self, key):

164

try:

251

try:

165

return self.p.map[key]

252

return self.p.map[key]

166

except KeyError:

253

except KeyError:

167

try:

254

try:

168

self.load(key)

255

self.load(key)

169

return self.p.map[key]

256

return self.p.map[key]

170

except KeyError:

257

except KeyError:

171

raise KeyError("node " + hex(key))

258

raise KeyError("node " + hex(key))

172

def __setitem__(self, key, val):

259

def __setitem__(self, key, val):

173

self.p.map[key] = val

260

self.p.map[key] = val

174

def __delitem__(self, key):

261

def __delitem__(self, key):

175

del self.p.map[key]

262

del self.p.map[key]

176

263

177

class RevlogError(Exception): pass

264

class RevlogError(Exception): pass

178

265

179

class revlog(object):

266

class revlog(object):

180

"""

267

"""

181

the underlying revision storage object

268

the underlying revision storage object

182

269

183

A revlog consists of two parts, an index and the revision data.

270

A revlog consists of two parts, an index and the revision data.

184

271

185

The index is a file with a fixed record size containing

272

The index is a file with a fixed record size containing

186

information on each revision, includings its nodeid (hash), the

273

information on each revision, includings its nodeid (hash), the

187

nodeids of its parents, the position and offset of its data within

274

nodeids of its parents, the position and offset of its data within

188

the data file, and the revision it's based on. Finally, each entry

275

the data file, and the revision it's based on. Finally, each entry

189

contains a linkrev entry that can serve as a pointer to external

276

contains a linkrev entry that can serve as a pointer to external

190

data.

277

data.

191

278

192

The revision data itself is a linear collection of data chunks.

279

The revision data itself is a linear collection of data chunks.

193

Each chunk represents a revision and is usually represented as a

280

Each chunk represents a revision and is usually represented as a

194

delta against the previous chunk. To bound lookup time, runs of

281

delta against the previous chunk. To bound lookup time, runs of

195

deltas are limited to about 2 times the length of the original

282

deltas are limited to about 2 times the length of the original

196

version data. This makes retrieval of a version proportional to

283

version data. This makes retrieval of a version proportional to

197

its size, or O(1) relative to the number of revisions.

284

its size, or O(1) relative to the number of revisions.

198

285

199

Both pieces of the revlog are written to in an append-only

286

Both pieces of the revlog are written to in an append-only

200

fashion, which means we never need to rewrite a file to insert or

287

fashion, which means we never need to rewrite a file to insert or

201

remove data, and can use some simple techniques to avoid the need

288

remove data, and can use some simple techniques to avoid the need

202

for locking while reading.

289

for locking while reading.

203

"""

290

"""

204

def __init__(self, opener, indexfile, datafile, defversion=0):

291

def __init__(self, opener, indexfile, datafile, defversion=0):

205

"""

292

"""

206

create a revlog object

293

create a revlog object

207

294

208

opener is a function that abstracts the file opening operation

295

opener is a function that abstracts the file opening operation

209

and can be used to implement COW semantics or the like.

296

and can be used to implement COW semantics or the like.

210

"""

297

"""

211

self.indexfile = indexfile

298

self.indexfile = indexfile

212

self.datafile = datafile

299

self.datafile = datafile

213

self.opener = opener

300

self.opener = opener

214

301

215

self.indexstat = None

302

self.indexstat = None

216

self.cache = None

303

self.cache = None

217

self.chunkcache = None

304

self.chunkcache = None

218

self.defversion = defversion

305

self.defversion = defversion

219

self.load()

306

self.load()

220

307

221

def load(self):

308

def load(self):

222

v = self.defversion

309

v = self.defversion

223

try:

310

try:

224

f = self.opener(self.indexfile)

311

f = self.opener(self.indexfile)

225

i = f.read()

312

i = f.read(4)

313

f.seek(0)

226

except IOError, inst:

314

except IOError, inst:

227

if inst.errno != errno.ENOENT:

315

if inst.errno != errno.ENOENT:

228

raise

316

raise

229

i = ""

317

i = ""

230

else:

318

else:

231

try:

319

try:

232

st = os.fstat(f.fileno())

320

st = os.fstat(f.fileno())

233

except AttributeError, inst:

321

except AttributeError, inst:

234

st = None

322

st = None

235

else:

323

else:

236

oldst = self.indexstat

324

oldst = self.indexstat

237

if (oldst and st.st_dev == oldst.st_dev

325

if (oldst and st.st_dev == oldst.st_dev

238

and st.st_ino == oldst.st_ino

326

and st.st_ino == oldst.st_ino

239

and st.st_mtime == oldst.st_mtime

327

and st.st_mtime == oldst.st_mtime

240

and st.st_ctime == oldst.st_ctime):

328

and st.st_ctime == oldst.st_ctime):

241

return

329

return

242

self.indexstat = st

330

self.indexstat = st

243

if len(i) > 0:

331

if len(i) > 0:

244

v = struct.unpack(versionformat, i[:4])[0]

332

v = struct.unpack(versionformat, i)[0]

245

flags = v & ~0xFFFF

333

flags = v & ~0xFFFF

246

fmt = v & 0xFFFF

334

fmt = v & 0xFFFF

247

if fmt == 0:

335

if fmt == 0:

248

if flags:

336

if flags:

249

raise RevlogError(_("index %s invalid flags %x for format v0" %

337

raise RevlogError(_("index %s invalid flags %x for format v0" %

250

(self.indexfile, flags)))

338

(self.indexfile, flags)))

251

elif fmt == REVLOGNG:

339

elif fmt == REVLOGNG:

252

if flags & ~REVLOGNGINLINEDATA:

340

if flags & ~REVLOGNGINLINEDATA:

253

raise RevlogError(_("index %s invalid flags %x for revlogng" %

341

raise RevlogError(_("index %s invalid flags %x for revlogng" %

254

(self.indexfile, flags)))

342

(self.indexfile, flags)))

255

else:

343

else:

256

raise RevlogError(_("index %s invalid format %d" %

344

raise RevlogError(_("index %s invalid format %d" %

257

(self.indexfile, fmt)))

345

(self.indexfile, fmt)))

258

self.version = v

346

self.version = v

259

if v == 0:

347

if v == 0:

260

self.indexformat = indexformatv0

348

self.indexformat = indexformatv0

349

shaoffset = v0shaoffset

261

else:

350

else:

262

self.indexformat = indexformatng

351

self.indexformat = indexformatng

352

shaoffset = ngshaoffset

263

353

264

if i:

354

if i:

265

if not self.inlinedata() and st and st.st_size > 10000:

355

if not self.inlinedata() and st and st.st_size > 10000:

266

# big index, let's parse it on demand

356

# big index, let's parse it on demand

267

parser = lazyparser(i, self, self.indexformat)

357

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

268

self.index = lazyindex(parser)

358

self.index = lazyindex(parser)

269

self.nodemap = lazymap(parser)

359

self.nodemap = lazymap(parser)

270

else:

360

else:

361

i = f.read()

271

self.parseindex(i)

362

self.parseindex(i)

272

if self.inlinedata():

363

if self.inlinedata():

273

# we've already got the entire data file read in, save it

364

# we've already got the entire data file read in, save it

274

# in the chunk data

365

# in the chunk data

275

self.chunkcache = (0, i)

366

self.chunkcache = (0, i)

276

if self.version != 0:

367

if self.version != 0:

277

e = list(self.index[0])

368

e = list(self.index[0])

278

type = self.ngtype(e[0])

369

type = self.ngtype(e[0])

279

e[0] = self.offset_type(0, type)

370

e[0] = self.offset_type(0, type)

280

self.index[0] = e

371

self.index[0] = e

281

else:

372

else:

282

self.nodemap = { nullid: -1}

373

self.nodemap = { nullid: -1}

283

self.index = []

374

self.index = []

284

375

285

376

286

def parseindex(self, data):

377

def parseindex(self, data):

287

s = struct.calcsize(self.indexformat)

378

s = struct.calcsize(self.indexformat)

288

l = len(data)

379

l = len(data)

289

self.index = []

380

self.index = []

290

self.nodemap = {nullid: -1}

381

self.nodemap = {nullid: -1}

291

inline = self.inlinedata()

382

inline = self.inlinedata()

292

off = 0

383

off = 0

293

n = 0

384

n = 0

294

while off < l:

385

while off < l:

295

e = struct.unpack(self.indexformat, data[off:off + s])

386

e = struct.unpack(self.indexformat, data[off:off + s])

296

self.index.append(e)

387

self.index.append(e)

297

self.nodemap[e[-1]] = n

388

self.nodemap[e[-1]] = n

298

n += 1

389

n += 1

299

off += s

390

off += s

300

if inline:

391

if inline:

301

off += e[1]

392

off += e[1]

302

393

303

def ngoffset(self, q):

394

def ngoffset(self, q):

304

if q & 0xFFFF:

395

if q & 0xFFFF:

305

raise RevlogError(_('%s: incompatible revision flag %x') %

396

raise RevlogError(_('%s: incompatible revision flag %x') %

306

(self.indexfile, type))

397

(self.indexfile, type))

307

return long(q >> 16)

398

return long(q >> 16)

308

399

309

def ngtype(self, q):

400

def ngtype(self, q):

310

return int(q & 0xFFFF)

401

return int(q & 0xFFFF)

311

402

312

def offset_type(self, offset, type):

403

def offset_type(self, offset, type):

313

return long(long(offset) << 16 | type)

404

return long(long(offset) << 16 | type)

314

405

406

def loadindex(self, start, end):

407

"""load a block of indexes all at once from the lazy parser"""

408

if isinstance(self.index, lazyindex):

409

self.index.p.loadindex(start, end)

410

315

def loadindexmap(self):

411

def loadindexmap(self):

316

"""loads both the map and the index from the lazy parser"""

412

"""loads both the map and the index from the lazy parser"""

317

if isinstance(self.index, lazyindex):

413

if isinstance(self.index, lazyindex):

318

p = self.index.p

414

p = self.index.p

319

p.load()

415

p.loadindex()

416

self.nodemap = p.map

417

418

def loadmap(self):

419

"""loads the map from the lazy parser"""

420

if isinstance(self.nodemap, lazymap):

421

self.nodemap.p.loadmap()

422

self.nodemap = self.nodemap.p.map

320

423

321

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

424

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

322

def tip(self): return self.node(len(self.index) - 1)

425

def tip(self): return self.node(len(self.index) - 1)

323

def count(self): return len(self.index)

426

def count(self): return len(self.index)

324

def node(self, rev):

427

def node(self, rev):

325

return (rev < 0) and nullid or self.index[rev][-1]

428

return (rev < 0) and nullid or self.index[rev][-1]

326

def rev(self, node):

429

def rev(self, node):

327

try:

430

try:

328

return self.nodemap[node]

431

return self.nodemap[node]

329

except KeyError:

432

except KeyError:

330

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

433

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

331

def linkrev(self, node): return self.index[self.rev(node)][-4]

434

def linkrev(self, node): return self.index[self.rev(node)][-4]

332

def parents(self, node):

435

def parents(self, node):

333

if node == nullid: return (nullid, nullid)

436

if node == nullid: return (nullid, nullid)

334

r = self.rev(node)

437

r = self.rev(node)

335

d = self.index[r][-3:-1]

438

d = self.index[r][-3:-1]

336

if self.version == 0:

439

if self.version == 0:

337

return d

440

return d

338

return [ self.node(x) for x in d ]

441

return [ self.node(x) for x in d ]

339

def start(self, rev):

442

def start(self, rev):

340

if rev < 0:

443

if rev < 0:

341

return -1

444

return -1

342

if self.version != 0:

445

if self.version != 0:

343

return self.ngoffset(self.index[rev][0])

446

return self.ngoffset(self.index[rev][0])

344

return self.index[rev][0]

447

return self.index[rev][0]

345

448

346

def end(self, rev): return self.start(rev) + self.length(rev)

449

def end(self, rev): return self.start(rev) + self.length(rev)

347

450

348

def size(self, rev):

451

def size(self, rev):

349

"""return the length of the uncompressed text for a given revision"""

452

"""return the length of the uncompressed text for a given revision"""

350

l = -1

453

l = -1

351

if self.version != 0:

454

if self.version != 0:

352

l = self.index[rev][2]

455

l = self.index[rev][2]

353

if l >= 0:

456

if l >= 0:

354

return l

457

return l

355

458

356

t = self.revision(self.node(rev))

459

t = self.revision(self.node(rev))

357

return len(t)

460

return len(t)

358

461

359

# alternate implementation, The advantage to this code is it

462

# alternate implementation, The advantage to this code is it

360

# will be faster for a single revision. But, the results are not

463

# will be faster for a single revision. But, the results are not

361

# cached, so finding the size of every revision will be slower.

464

# cached, so finding the size of every revision will be slower.

362

"""

465

"""

363

if self.cache and self.cache[1] == rev:

466

if self.cache and self.cache[1] == rev:

364

return len(self.cache[2])

467

return len(self.cache[2])

365

468

366

base = self.base(rev)

469

base = self.base(rev)

367

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

470

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

368

base = self.cache[1]

471

base = self.cache[1]

369

text = self.cache[2]

472

text = self.cache[2]

370

else:

473

else:

371

text = self.revision(self.node(base))

474

text = self.revision(self.node(base))

372

475

373

l = len(text)

476

l = len(text)

374

for x in xrange(base + 1, rev + 1):

477

for x in xrange(base + 1, rev + 1):

375

l = mdiff.patchedsize(l, self.chunk(x))

478

l = mdiff.patchedsize(l, self.chunk(x))

376

return l

479

return l

377

"""

480

"""

378

481

379

def length(self, rev):

482

def length(self, rev):

380

if rev < 0:

483

if rev < 0:

381

return 0

484

return 0

382

else:

485

else:

383

return self.index[rev][1]

486

return self.index[rev][1]

384

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

487

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

385

488

386

def reachable(self, rev, stop=None):

489

def reachable(self, rev, stop=None):

387

reachable = {}

490

reachable = {}

388

visit = [rev]

491

visit = [rev]

389

reachable[rev] = 1

492

reachable[rev] = 1

390

if stop:

493

if stop:

391

stopn = self.rev(stop)

494

stopn = self.rev(stop)

392

else:

495

else:

393

stopn = 0

496

stopn = 0

394

while visit:

497

while visit:

395

n = visit.pop(0)

498

n = visit.pop(0)

396

if n == stop:

499

if n == stop:

397

continue

500

continue

398

if n == nullid:

501

if n == nullid:

399

continue

502

continue

400

for p in self.parents(n):

503

for p in self.parents(n):

401

if self.rev(p) < stopn:

504

if self.rev(p) < stopn:

402

continue

505

continue

403

if p not in reachable:

506

if p not in reachable:

404

reachable[p] = 1

507

reachable[p] = 1

405

visit.append(p)

508

visit.append(p)

406

return reachable

509

return reachable

407

510

408

def nodesbetween(self, roots=None, heads=None):

511

def nodesbetween(self, roots=None, heads=None):

409

"""Return a tuple containing three elements. Elements 1 and 2 contain

512

"""Return a tuple containing three elements. Elements 1 and 2 contain

410

a final list bases and heads after all the unreachable ones have been

513

a final list bases and heads after all the unreachable ones have been

411

pruned. Element 0 contains a topologically sorted list of all

514

pruned. Element 0 contains a topologically sorted list of all

412

515

413

nodes that satisfy these constraints:

516

nodes that satisfy these constraints:

414

1. All nodes must be descended from a node in roots (the nodes on

517

1. All nodes must be descended from a node in roots (the nodes on

415

roots are considered descended from themselves).

518

roots are considered descended from themselves).

416

2. All nodes must also be ancestors of a node in heads (the nodes in

519

2. All nodes must also be ancestors of a node in heads (the nodes in

417

heads are considered to be their own ancestors).

520

heads are considered to be their own ancestors).

418

521

419

If roots is unspecified, nullid is assumed as the only root.

522

If roots is unspecified, nullid is assumed as the only root.

420

If heads is unspecified, it is taken to be the output of the

523

If heads is unspecified, it is taken to be the output of the

421

heads method (i.e. a list of all nodes in the repository that

524

heads method (i.e. a list of all nodes in the repository that

422

have no children)."""

525

have no children)."""

423

nonodes = ([], [], [])

526

nonodes = ([], [], [])

424

if roots is not None:

527

if roots is not None:

425

roots = list(roots)

528

roots = list(roots)

426

if not roots:

529

if not roots:

427

return nonodes

530

return nonodes

428

lowestrev = min([self.rev(n) for n in roots])

531

lowestrev = min([self.rev(n) for n in roots])

429

else:

532

else:

430

roots = [nullid] # Everybody's a descendent of nullid

533

roots = [nullid] # Everybody's a descendent of nullid

431

lowestrev = -1

534

lowestrev = -1

432

if (lowestrev == -1) and (heads is None):

535

if (lowestrev == -1) and (heads is None):

433

# We want _all_ the nodes!

536

# We want _all_ the nodes!

434

return ([self.node(r) for r in xrange(0, self.count())],

537

return ([self.node(r) for r in xrange(0, self.count())],

435

[nullid], list(self.heads()))

538

[nullid], list(self.heads()))

436

if heads is None:

539

if heads is None:

437

# All nodes are ancestors, so the latest ancestor is the last

540

# All nodes are ancestors, so the latest ancestor is the last

438

# node.

541

# node.

439

highestrev = self.count() - 1

542

highestrev = self.count() - 1

440

# Set ancestors to None to signal that every node is an ancestor.

543

# Set ancestors to None to signal that every node is an ancestor.

441

ancestors = None

544

ancestors = None

442

# Set heads to an empty dictionary for later discovery of heads

545

# Set heads to an empty dictionary for later discovery of heads

443

heads = {}

546

heads = {}

444

else:

547

else:

445

heads = list(heads)

548

heads = list(heads)

446

if not heads:

549

if not heads:

447

return nonodes

550

return nonodes

448

ancestors = {}

551

ancestors = {}

449

# Start at the top and keep marking parents until we're done.

552

# Start at the top and keep marking parents until we're done.

450

nodestotag = heads[:]

553

nodestotag = heads[:]

451

# Turn heads into a dictionary so we can remove 'fake' heads.

554

# Turn heads into a dictionary so we can remove 'fake' heads.

452

# Also, later we will be using it to filter out the heads we can't

555

# Also, later we will be using it to filter out the heads we can't

453

# find from roots.

556

# find from roots.

454

heads = dict.fromkeys(heads, 0)

557

heads = dict.fromkeys(heads, 0)

455

# Remember where the top was so we can use it as a limit later.

558

# Remember where the top was so we can use it as a limit later.

456

highestrev = max([self.rev(n) for n in nodestotag])

559

highestrev = max([self.rev(n) for n in nodestotag])

457

while nodestotag:

560

while nodestotag:

458

# grab a node to tag

561

# grab a node to tag

459

n = nodestotag.pop()

562

n = nodestotag.pop()

460

# Never tag nullid

563

# Never tag nullid

461

if n == nullid:

564

if n == nullid:

462

continue

565

continue

463

# A node's revision number represents its place in a

566

# A node's revision number represents its place in a

464

# topologically sorted list of nodes.

567

# topologically sorted list of nodes.

465

r = self.rev(n)

568

r = self.rev(n)

466

if r >= lowestrev:

569

if r >= lowestrev:

467

if n not in ancestors:

570

if n not in ancestors:

468

# If we are possibly a descendent of one of the roots

571

# If we are possibly a descendent of one of the roots

469

# and we haven't already been marked as an ancestor

572

# and we haven't already been marked as an ancestor

470

ancestors[n] = 1 # Mark as ancestor

573

ancestors[n] = 1 # Mark as ancestor

471

# Add non-nullid parents to list of nodes to tag.

574

# Add non-nullid parents to list of nodes to tag.

472

nodestotag.extend([p for p in self.parents(n) if

575

nodestotag.extend([p for p in self.parents(n) if

473

p != nullid])

576

p != nullid])

474

elif n in heads: # We've seen it before, is it a fake head?

577

elif n in heads: # We've seen it before, is it a fake head?

475

# So it is, real heads should not be the ancestors of

578

# So it is, real heads should not be the ancestors of

476

# any other heads.

579

# any other heads.

477

heads.pop(n)

580

heads.pop(n)

478

if not ancestors:

581

if not ancestors:

479

return nonodes

582

return nonodes

480

# Now that we have our set of ancestors, we want to remove any

583

# Now that we have our set of ancestors, we want to remove any

481

# roots that are not ancestors.

584

# roots that are not ancestors.

482

585

483

# If one of the roots was nullid, everything is included anyway.

586

# If one of the roots was nullid, everything is included anyway.

484

if lowestrev > -1:

587

if lowestrev > -1:

485

# But, since we weren't, let's recompute the lowest rev to not

588

# But, since we weren't, let's recompute the lowest rev to not

486

# include roots that aren't ancestors.

589

# include roots that aren't ancestors.

487

590

488

# Filter out roots that aren't ancestors of heads

591

# Filter out roots that aren't ancestors of heads

489

roots = [n for n in roots if n in ancestors]

592

roots = [n for n in roots if n in ancestors]

490

# Recompute the lowest revision

593

# Recompute the lowest revision

491

if roots:

594

if roots:

492

lowestrev = min([self.rev(n) for n in roots])

595

lowestrev = min([self.rev(n) for n in roots])

493

else:

596

else:

494

# No more roots? Return empty list

597

# No more roots? Return empty list

495

return nonodes

598

return nonodes

496

else:

599

else:

497

# We are descending from nullid, and don't need to care about

600

# We are descending from nullid, and don't need to care about

498

# any other roots.

601

# any other roots.

499

lowestrev = -1

602

lowestrev = -1

500

roots = [nullid]

603

roots = [nullid]

501

# Transform our roots list into a 'set' (i.e. a dictionary where the

604

# Transform our roots list into a 'set' (i.e. a dictionary where the

502

# values don't matter.

605

# values don't matter.

503

descendents = dict.fromkeys(roots, 1)

606

descendents = dict.fromkeys(roots, 1)

504

# Also, keep the original roots so we can filter out roots that aren't

607

# Also, keep the original roots so we can filter out roots that aren't

505

# 'real' roots (i.e. are descended from other roots).

608

# 'real' roots (i.e. are descended from other roots).

506

roots = descendents.copy()

609

roots = descendents.copy()

507

# Our topologically sorted list of output nodes.

610

# Our topologically sorted list of output nodes.

508

orderedout = []

611

orderedout = []

509

# Don't start at nullid since we don't want nullid in our output list,

612

# Don't start at nullid since we don't want nullid in our output list,

510

# and if nullid shows up in descedents, empty parents will look like

613

# and if nullid shows up in descedents, empty parents will look like

511

# they're descendents.

614

# they're descendents.

512

for r in xrange(max(lowestrev, 0), highestrev + 1):

615

for r in xrange(max(lowestrev, 0), highestrev + 1):

513

n = self.node(r)

616

n = self.node(r)

514

isdescendent = False

617

isdescendent = False

515

if lowestrev == -1: # Everybody is a descendent of nullid

618

if lowestrev == -1: # Everybody is a descendent of nullid

516

isdescendent = True

619

isdescendent = True

517

elif n in descendents:

620

elif n in descendents:

518

# n is already a descendent

621

# n is already a descendent

519

isdescendent = True

622

isdescendent = True

520

# This check only needs to be done here because all the roots

623

# This check only needs to be done here because all the roots

521

# will start being marked is descendents before the loop.

624

# will start being marked is descendents before the loop.

522

if n in roots:

625

if n in roots:

523

# If n was a root, check if it's a 'real' root.

626

# If n was a root, check if it's a 'real' root.

524

p = tuple(self.parents(n))

627

p = tuple(self.parents(n))

525

# If any of its parents are descendents, it's not a root.

628

# If any of its parents are descendents, it's not a root.

526

if (p[0] in descendents) or (p[1] in descendents):

629

if (p[0] in descendents) or (p[1] in descendents):

527

roots.pop(n)

630

roots.pop(n)

528

else:

631

else:

529

p = tuple(self.parents(n))

632

p = tuple(self.parents(n))

530

# A node is a descendent if either of its parents are

633

# A node is a descendent if either of its parents are

531

# descendents. (We seeded the dependents list with the roots

634

# descendents. (We seeded the dependents list with the roots

532

# up there, remember?)

635

# up there, remember?)

533

if (p[0] in descendents) or (p[1] in descendents):

636

if (p[0] in descendents) or (p[1] in descendents):

534

descendents[n] = 1

637

descendents[n] = 1

535

isdescendent = True

638

isdescendent = True

536

if isdescendent and ((ancestors is None) or (n in ancestors)):

639

if isdescendent and ((ancestors is None) or (n in ancestors)):

537

# Only include nodes that are both descendents and ancestors.

640

# Only include nodes that are both descendents and ancestors.

538

orderedout.append(n)

641

orderedout.append(n)

539

if (ancestors is not None) and (n in heads):

642

if (ancestors is not None) and (n in heads):

540

# We're trying to figure out which heads are reachable

643

# We're trying to figure out which heads are reachable

541

# from roots.

644

# from roots.

542

# Mark this head as having been reached

645

# Mark this head as having been reached

543

heads[n] = 1

646

heads[n] = 1

544

elif ancestors is None:

647

elif ancestors is None:

545

# Otherwise, we're trying to discover the heads.

648

# Otherwise, we're trying to discover the heads.

546

# Assume this is a head because if it isn't, the next step

649

# Assume this is a head because if it isn't, the next step

547

# will eventually remove it.

650

# will eventually remove it.

548

heads[n] = 1

651

heads[n] = 1

549

# But, obviously its parents aren't.

652

# But, obviously its parents aren't.

550

for p in self.parents(n):

653

for p in self.parents(n):

551

heads.pop(p, None)

654

heads.pop(p, None)

552

heads = [n for n in heads.iterkeys() if heads[n] != 0]

655

heads = [n for n in heads.iterkeys() if heads[n] != 0]

553

roots = roots.keys()

656

roots = roots.keys()

554

assert orderedout

657

assert orderedout

555

assert roots

658

assert roots

556

assert heads

659

assert heads

557

return (orderedout, roots, heads)

660

return (orderedout, roots, heads)

558

661

559

def heads(self, start=None):

662

def heads(self, start=None):

560

"""return the list of all nodes that have no children

663

"""return the list of all nodes that have no children

561

664

562

if start is specified, only heads that are descendants of

665

if start is specified, only heads that are descendants of

563

start will be returned

666

start will be returned

564

667

565

"""

668

"""

566

if start is None:

669

if start is None:

567

start = nullid

670

start = nullid

568

reachable = {start: 1}

671

reachable = {start: 1}

569

heads = {start: 1}

672

heads = {start: 1}

570

startrev = self.rev(start)

673

startrev = self.rev(start)

571

674

572

for r in xrange(startrev + 1, self.count()):

675

for r in xrange(startrev + 1, self.count()):

573

n = self.node(r)

676

n = self.node(r)

574

for pn in self.parents(n):

677

for pn in self.parents(n):

575

if pn in reachable:

678

if pn in reachable:

576

reachable[n] = 1

679

reachable[n] = 1

577

heads[n] = 1

680

heads[n] = 1

578

if pn in heads:

681

if pn in heads:

579

del heads[pn]

682

del heads[pn]

580

return heads.keys()

683

return heads.keys()

581

684

582

def children(self, node):

685

def children(self, node):

583

"""find the children of a given node"""

686

"""find the children of a given node"""

584

c = []

687

c = []

585

p = self.rev(node)

688

p = self.rev(node)

586

for r in range(p + 1, self.count()):

689

for r in range(p + 1, self.count()):

587

n = self.node(r)

690

n = self.node(r)

588

for pn in self.parents(n):

691

for pn in self.parents(n):

589

if pn == node:

692

if pn == node:

590

c.append(n)

693

c.append(n)

591

continue

694

continue

592

elif pn == nullid:

695

elif pn == nullid:

593

continue

696

continue

594

return c

697

return c

595

698

596

def lookup(self, id):

699

def lookup(self, id):

597

"""locate a node based on revision number or subset of hex nodeid"""

700

"""locate a node based on revision number or subset of hex nodeid"""

598

try:

701

try:

599

rev = int(id)

702

rev = int(id)

600

if str(rev) != id: raise ValueError

703

if str(rev) != id: raise ValueError

601

if rev < 0: rev = self.count() + rev

704

if rev < 0: rev = self.count() + rev

602

if rev < 0 or rev >= self.count(): raise ValueError

705

if rev < 0 or rev >= self.count(): raise ValueError

603

return self.node(rev)

706

return self.node(rev)

604

except (ValueError, OverflowError):

707

except (ValueError, OverflowError):

605

c = []

708

c = []

606

for n in self.nodemap:

709

for n in self.nodemap:

607

if hex(n).startswith(id):

710

if hex(n).startswith(id):

608

c.append(n)

711

c.append(n)

609

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

712

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

610

if len(c) < 1: raise RevlogError(_("No match found"))

713

if len(c) < 1: raise RevlogError(_("No match found"))

611

return c[0]

714

return c[0]

612

715

613

return None

716

return None

614

717

615

def diff(self, a, b):

718

def diff(self, a, b):

616

"""return a delta between two revisions"""

719

"""return a delta between two revisions"""

617

return mdiff.textdiff(a, b)

720

return mdiff.textdiff(a, b)

618

721

619

def patches(self, t, pl):

722

def patches(self, t, pl):

620

"""apply a list of patches to a string"""

723

"""apply a list of patches to a string"""

621

return mdiff.patches(t, pl)

724

return mdiff.patches(t, pl)

622

725

623

def chunk(self, rev, df=None, cachelen=4096):

726

def chunk(self, rev, df=None, cachelen=4096):

624

start, length = self.start(rev), self.length(rev)

727

start, length = self.start(rev), self.length(rev)

625

inline = self.inlinedata()

728

inline = self.inlinedata()

626

if inline:

729

if inline:

627

start += (rev + 1) * struct.calcsize(self.indexformat)

730

start += (rev + 1) * struct.calcsize(self.indexformat)

628

end = start + length

731

end = start + length

629

def loadcache(df):

732

def loadcache(df):

630

cache_length = max(cachelen, length) # 4k

733

cache_length = max(cachelen, length) # 4k

631

if not df:

734

if not df:

632

if inline:

735

if inline:

633

df = self.opener(self.indexfile)

736

df = self.opener(self.indexfile)

634

else:

737

else:

635

df = self.opener(self.datafile)

738

df = self.opener(self.datafile)

636

df.seek(start)

739

df.seek(start)

637

self.chunkcache = (start, df.read(cache_length))

740

self.chunkcache = (start, df.read(cache_length))

638

741

639

if not self.chunkcache:

742

if not self.chunkcache:

640

loadcache(df)

743

loadcache(df)

641

744

642

cache_start = self.chunkcache[0]

745

cache_start = self.chunkcache[0]

643

cache_end = cache_start + len(self.chunkcache[1])

746

cache_end = cache_start + len(self.chunkcache[1])

644

if start >= cache_start and end <= cache_end:

747

if start >= cache_start and end <= cache_end:

645

# it is cached

748

# it is cached

646

offset = start - cache_start

749

offset = start - cache_start

647

else:

750

else:

648

loadcache(df)

751

loadcache(df)

649

offset = 0

752

offset = 0

650

753

651

#def checkchunk():

754

#def checkchunk():

652

# df = self.opener(self.datafile)

755

# df = self.opener(self.datafile)

653

# df.seek(start)

756

# df.seek(start)

654

# return df.read(length)

757

# return df.read(length)

655

#assert s == checkchunk()

758

#assert s == checkchunk()

656

return decompress(self.chunkcache[1][offset:offset + length])

759

return decompress(self.chunkcache[1][offset:offset + length])

657

760

658

def delta(self, node):

761

def delta(self, node):

659

"""return or calculate a delta between a node and its predecessor"""

762

"""return or calculate a delta between a node and its predecessor"""

660

r = self.rev(node)

763

r = self.rev(node)

661

return self.revdiff(r - 1, r)

764

return self.revdiff(r - 1, r)

662

765

663

def revdiff(self, rev1, rev2):

766

def revdiff(self, rev1, rev2):

664

"""return or calculate a delta between two revisions"""

767

"""return or calculate a delta between two revisions"""

665

b1 = self.base(rev1)

768

b1 = self.base(rev1)

666

b2 = self.base(rev2)

769

b2 = self.base(rev2)

667

if b1 == b2 and rev1 + 1 == rev2:

770

if b1 == b2 and rev1 + 1 == rev2:

668

return self.chunk(rev2)

771

return self.chunk(rev2)

669

else:

772

else:

670

return self.diff(self.revision(self.node(rev1)),

773

return self.diff(self.revision(self.node(rev1)),

671

self.revision(self.node(rev2)))

774

self.revision(self.node(rev2)))

672

775

673

def revision(self, node):

776

def revision(self, node):

674

"""return an uncompressed revision of a given"""

777

"""return an uncompressed revision of a given"""

675

if node == nullid: return ""

778

if node == nullid: return ""

676

if self.cache and self.cache[0] == node: return self.cache[2]

779

if self.cache and self.cache[0] == node: return self.cache[2]

677

780

678

# look up what we need to read

781

# look up what we need to read

679

text = None

782

text = None

680

rev = self.rev(node)

783

rev = self.rev(node)

681

base = self.base(rev)

784

base = self.base(rev)

682

785

683

if self.inlinedata():

786

if self.inlinedata():

684

# we probably have the whole chunk cached

787

# we probably have the whole chunk cached

685

df = None

788

df = None

686

else:

789

else:

687

df = self.opener(self.datafile)

790

df = self.opener(self.datafile)

688

791

689

# do we have useful data cached?

792

# do we have useful data cached?

690

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

793

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

691

base = self.cache[1]

794

base = self.cache[1]

692

text = self.cache[2]

795

text = self.cache[2]

796

self.loadindex(base, rev + 1)

693

else:

797

else:

798

self.loadindex(base, rev + 1)

694

text = self.chunk(base, df=df)

799

text = self.chunk(base, df=df)

695

800

696

bins = []

801

bins = []

697

for r in xrange(base + 1, rev + 1):

802

for r in xrange(base + 1, rev + 1):

698

bins.append(self.chunk(r, df=df))

803

bins.append(self.chunk(r, df=df))

699

804

700

text = self.patches(text, bins)

805

text = self.patches(text, bins)

701

806

702

p1, p2 = self.parents(node)

807

p1, p2 = self.parents(node)

703

if node != hash(text, p1, p2):

808

if node != hash(text, p1, p2):

704

raise RevlogError(_("integrity check failed on %s:%d")

809

raise RevlogError(_("integrity check failed on %s:%d")

705

% (self.datafile, rev))

810

% (self.datafile, rev))

706

811

707

self.cache = (node, rev, text)

812

self.cache = (node, rev, text)

708

return text

813

return text

709

814

710

def checkinlinesize(self, tr, fp=None):

815

def checkinlinesize(self, tr, fp=None):

711

if not self.inlinedata():

816

if not self.inlinedata():

712

return

817

return

713

if not fp:

818

if not fp:

714

fp = self.opener(self.indexfile, 'r')

819

fp = self.opener(self.indexfile, 'r')

715

size = fp.tell()

820

size = fp.tell()

716

if size < 131072:

821

if size < 131072:

717

return

822

return

718

tr.add(self.datafile, 0)

823

tr.add(self.datafile, 0)

719

df = self.opener(self.datafile, 'w')

824

df = self.opener(self.datafile, 'w')

720

calc = struct.calcsize(self.indexformat)

825

calc = struct.calcsize(self.indexformat)

721

for r in xrange(self.count()):

826

for r in xrange(self.count()):

722

start = self.start(r) + (r + 1) * calc

827

start = self.start(r) + (r + 1) * calc

723

length = self.length(r)

828

length = self.length(r)

724

fp.seek(start)

829

fp.seek(start)

725

d = fp.read(length)

830

d = fp.read(length)

726

df.write(d)

831

df.write(d)

727

fp.close()

832

fp.close()

728

df.close()

833

df.close()

729

fp = self.opener(self.indexfile, 'w', atomictemp=True)

834

fp = self.opener(self.indexfile, 'w', atomictemp=True)

730

self.version &= ~(REVLOGNGINLINEDATA)

835

self.version &= ~(REVLOGNGINLINEDATA)

731

if self.count():

836

if self.count():

732

x = self.index[0]

837

x = self.index[0]

733

e = struct.pack(self.indexformat, *x)[4:]

838

e = struct.pack(self.indexformat, *x)[4:]

734

l = struct.pack(versionformat, self.version)

839

l = struct.pack(versionformat, self.version)

735

fp.write(l)

840

fp.write(l)

736

fp.write(e)

841

fp.write(e)

737

842

738

for i in xrange(1, self.count()):

843

for i in xrange(1, self.count()):

739

x = self.index[i]

844

x = self.index[i]

740

e = struct.pack(self.indexformat, *x)

845

e = struct.pack(self.indexformat, *x)

741

fp.write(e)

846

fp.write(e)

742

847

743

# if we don't call rename, the temp file will never replace the

848

# if we don't call rename, the temp file will never replace the

744

# real index

849

# real index

745

fp.rename()

850

fp.rename()

746

self.chunkcache = None

851

self.chunkcache = None

747

852

748

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

853

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

749

"""add a revision to the log

854

"""add a revision to the log

750

855

751

text - the revision data to add

856

text - the revision data to add

752

transaction - the transaction object used for rollback

857

transaction - the transaction object used for rollback

753

link - the linkrev data to add

858

link - the linkrev data to add

754

p1, p2 - the parent nodeids of the revision

859

p1, p2 - the parent nodeids of the revision

755

d - an optional precomputed delta

860

d - an optional precomputed delta

756

"""

861

"""

757

if text is None: text = ""

862

if text is None: text = ""

758

if p1 is None: p1 = self.tip()

863

if p1 is None: p1 = self.tip()

759

if p2 is None: p2 = nullid

864

if p2 is None: p2 = nullid

760

865

761

node = hash(text, p1, p2)

866

node = hash(text, p1, p2)

762

867

763

if node in self.nodemap:

868

if node in self.nodemap:

764

return node

869

return node

765

870

766

n = self.count()

871

n = self.count()

767

t = n - 1

872

t = n - 1

768

873

769

if n:

874

if n:

770

base = self.base(t)

875

base = self.base(t)

771

start = self.start(base)

876

start = self.start(base)

772

end = self.end(t)

877

end = self.end(t)

773

if not d:

878

if not d:

774

prev = self.revision(self.tip())

879

prev = self.revision(self.tip())

775

d = self.diff(prev, str(text))

880

d = self.diff(prev, str(text))

776

data = compress(d)

881

data = compress(d)

777

l = len(data[1]) + len(data[0])

882

l = len(data[1]) + len(data[0])

778

dist = end - start + l

883

dist = end - start + l

779

884

780

# full versions are inserted when the needed deltas

885

# full versions are inserted when the needed deltas

781

# become comparable to the uncompressed text

886

# become comparable to the uncompressed text

782

if not n or dist > len(text) * 2:

887

if not n or dist > len(text) * 2:

783

data = compress(text)

888

data = compress(text)

784

l = len(data[1]) + len(data[0])

889

l = len(data[1]) + len(data[0])

785

base = n

890

base = n

786

else:

891

else:

787

base = self.base(t)

892

base = self.base(t)

788

893

789

offset = 0

894

offset = 0

790

if t >= 0:

895

if t >= 0:

791

offset = self.end(t)

896

offset = self.end(t)

792

897

793

if self.version == 0:

898

if self.version == 0:

794

e = (offset, l, base, link, p1, p2, node)

899

e = (offset, l, base, link, p1, p2, node)

795

else:

900

else:

796

e = (self.offset_type(offset, 0), l, len(text),

901

e = (self.offset_type(offset, 0), l, len(text),

797

base, link, self.rev(p1), self.rev(p2), node)

902

base, link, self.rev(p1), self.rev(p2), node)

798

903

799

self.index.append(e)

904

self.index.append(e)

800

self.nodemap[node] = n

905

self.nodemap[node] = n

801

entry = struct.pack(self.indexformat, *e)

906

entry = struct.pack(self.indexformat, *e)

802

907

803

if not self.inlinedata():

908

if not self.inlinedata():

804

transaction.add(self.datafile, offset)

909

transaction.add(self.datafile, offset)

805

transaction.add(self.indexfile, n * len(entry))

910

transaction.add(self.indexfile, n * len(entry))

806

f = self.opener(self.datafile, "a")

911

f = self.opener(self.datafile, "a")

807

if data[0]:

912

if data[0]:

808

f.write(data[0])

913

f.write(data[0])

809

f.write(data[1])

914

f.write(data[1])

810

f = self.opener(self.indexfile, "a")

915

f = self.opener(self.indexfile, "a")

811

else:

916

else:

812

f = self.opener(self.indexfile, "a+")

917

f = self.opener(self.indexfile, "a+")

813

f.seek(0, 2)

918

f.seek(0, 2)

814

transaction.add(self.indexfile, f.tell())

919

transaction.add(self.indexfile, f.tell())

815

920

816

if len(self.index) == 1 and self.version != 0:

921

if len(self.index) == 1 and self.version != 0:

817

l = struct.pack(versionformat, self.version)

922

l = struct.pack(versionformat, self.version)

818

f.write(l)

923

f.write(l)

819

entry = entry[4:]

924

entry = entry[4:]

820

925

821

f.write(entry)

926

f.write(entry)

822

927

823

if self.inlinedata():

928

if self.inlinedata():

824

f.write(data[0])

929

f.write(data[0])

825

f.write(data[1])

930

f.write(data[1])

826

self.checkinlinesize(transaction, f)

931

self.checkinlinesize(transaction, f)

827

932

828

self.cache = (node, n, text)

933

self.cache = (node, n, text)

829

return node

934

return node

830

935

831

def ancestor(self, a, b):

936

def ancestor(self, a, b):

832

"""calculate the least common ancestor of nodes a and b"""

937

"""calculate the least common ancestor of nodes a and b"""

833

# calculate the distance of every node from root

938

# calculate the distance of every node from root

834

dist = {nullid: 0}

939

dist = {nullid: 0}

835

for i in xrange(self.count()):

940

for i in xrange(self.count()):

836

n = self.node(i)

941

n = self.node(i)

837

p1, p2 = self.parents(n)

942

p1, p2 = self.parents(n)

838

dist[n] = max(dist[p1], dist[p2]) + 1

943

dist[n] = max(dist[p1], dist[p2]) + 1

839

944

840

# traverse ancestors in order of decreasing distance from root

945

# traverse ancestors in order of decreasing distance from root

841

def ancestors(node):

946

def ancestors(node):

842

# we store negative distances because heap returns smallest member

947

# we store negative distances because heap returns smallest member

843

h = [(-dist[node], node)]

948

h = [(-dist[node], node)]

844

seen = {}

949

seen = {}

845

while h:

950

while h:

846

d, n = heapq.heappop(h)

951

d, n = heapq.heappop(h)

847

if n not in seen:

952

if n not in seen:

848

seen[n] = 1

953

seen[n] = 1

849

yield (-d, n)

954

yield (-d, n)

850

for p in self.parents(n):

955

for p in self.parents(n):

851

heapq.heappush(h, (-dist[p], p))

956

heapq.heappush(h, (-dist[p], p))

852

957

853

def generations(node):

958

def generations(node):

854

sg, s = None, {}

959

sg, s = None, {}

855

for g,n in ancestors(node):

960

for g,n in ancestors(node):

856

if g != sg:

961

if g != sg:

857

if sg:

962

if sg:

858

yield sg, s

963

yield sg, s

859

sg, s = g, {n:1}

964

sg, s = g, {n:1}

860

else:

965

else:

861

s[n] = 1

966

s[n] = 1

862

yield sg, s

967

yield sg, s

863

968

864

x = generations(a)

969

x = generations(a)

865

y = generations(b)

970

y = generations(b)

866

gx = x.next()

971

gx = x.next()

867

gy = y.next()

972

gy = y.next()

868

973

869

# increment each ancestor list until it is closer to root than

974

# increment each ancestor list until it is closer to root than

870

# the other, or they match

975

# the other, or they match

871

while 1:

976

while 1:

872

#print "ancestor gen %s %s" % (gx[0], gy[0])

977

#print "ancestor gen %s %s" % (gx[0], gy[0])

873

if gx[0] == gy[0]:

978

if gx[0] == gy[0]:

874

# find the intersection

979

# find the intersection

875

i = [ n for n in gx[1] if n in gy[1] ]

980

i = [ n for n in gx[1] if n in gy[1] ]

876

if i:

981

if i:

877

return i[0]

982

return i[0]

878

else:

983

else:

879

#print "next"

984

#print "next"

880

gy = y.next()

985

gy = y.next()

881

gx = x.next()

986

gx = x.next()

882

elif gx[0] < gy[0]:

987

elif gx[0] < gy[0]:

883

#print "next y"

988

#print "next y"

884

gy = y.next()

989

gy = y.next()

885

else:

990

else:

886

#print "next x"

991

#print "next x"

887

gx = x.next()

992

gx = x.next()

888

993

889

def group(self, nodelist, lookup, infocollect=None):

994

def group(self, nodelist, lookup, infocollect=None):

890

"""calculate a delta group

995

"""calculate a delta group

891

996

892

Given a list of changeset revs, return a set of deltas and

997

Given a list of changeset revs, return a set of deltas and

893

metadata corresponding to nodes. the first delta is

998

metadata corresponding to nodes. the first delta is

894

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

999

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

895

have this parent as it has all history before these

1000

have this parent as it has all history before these

896

changesets. parent is parent[0]

1001

changesets. parent is parent[0]

897

"""

1002

"""

898

revs = [self.rev(n) for n in nodelist]

1003

revs = [self.rev(n) for n in nodelist]

899

1004

900

# if we don't have any revisions touched by these changesets, bail

1005

# if we don't have any revisions touched by these changesets, bail

901

if not revs:

1006

if not revs:

902

yield changegroup.closechunk()

1007

yield changegroup.closechunk()

903

return

1008

return

904

1009

905

# add the parent of the first rev

1010

# add the parent of the first rev

906

p = self.parents(self.node(revs[0]))[0]

1011

p = self.parents(self.node(revs[0]))[0]

907

revs.insert(0, self.rev(p))

1012

revs.insert(0, self.rev(p))

908

1013

909

# build deltas

1014

# build deltas

910

for d in xrange(0, len(revs) - 1):

1015

for d in xrange(0, len(revs) - 1):

911

a, b = revs[d], revs[d + 1]

1016

a, b = revs[d], revs[d + 1]

912

nb = self.node(b)

1017

nb = self.node(b)

913

1018

914

if infocollect is not None:

1019

if infocollect is not None:

915

infocollect(nb)

1020

infocollect(nb)

916

1021

917

d = self.revdiff(a, b)

1022

d = self.revdiff(a, b)

918

p = self.parents(nb)

1023

p = self.parents(nb)

919

meta = nb + p[0] + p[1] + lookup(nb)

1024

meta = nb + p[0] + p[1] + lookup(nb)

920

yield changegroup.genchunk("%s%s" % (meta, d))

1025

yield changegroup.genchunk("%s%s" % (meta, d))

921

1026

922

yield changegroup.closechunk()

1027

yield changegroup.closechunk()

923

1028

924

def addgroup(self, revs, linkmapper, transaction, unique=0):

1029

def addgroup(self, revs, linkmapper, transaction, unique=0):

925

"""

1030

"""

926

add a delta group

1031

add a delta group

927

1032

928

given a set of deltas, add them to the revision log. the

1033

given a set of deltas, add them to the revision log. the

929

first delta is against its parent, which should be in our

1034

first delta is against its parent, which should be in our

930

log, the rest are against the previous delta.

1035

log, the rest are against the previous delta.

931

"""

1036

"""

932

1037

933

#track the base of the current delta log

1038

#track the base of the current delta log

934

r = self.count()

1039

r = self.count()

935

t = r - 1

1040

t = r - 1

936

node = None

1041

node = None

937

1042

938

base = prev = -1

1043

base = prev = -1

939

start = end = textlen = 0

1044

start = end = textlen = 0

940

if r:

1045

if r:

941

end = self.end(t)

1046

end = self.end(t)

942

1047

943

ifh = self.opener(self.indexfile, "a+")

1048

ifh = self.opener(self.indexfile, "a+")

944

ifh.seek(0, 2)

1049

ifh.seek(0, 2)

945

transaction.add(self.indexfile, ifh.tell())

1050

transaction.add(self.indexfile, ifh.tell())

946

if self.inlinedata():

1051

if self.inlinedata():

947

dfh = None

1052

dfh = None

948

else:

1053

else:

949

transaction.add(self.datafile, end)

1054

transaction.add(self.datafile, end)

950

dfh = self.opener(self.datafile, "a")

1055

dfh = self.opener(self.datafile, "a")

951

1056

952

# loop through our set of deltas

1057

# loop through our set of deltas

953

chain = None

1058

chain = None

954

for chunk in revs:

1059

for chunk in revs:

955

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1060

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

956

link = linkmapper(cs)

1061

link = linkmapper(cs)

957

if node in self.nodemap:

1062

if node in self.nodemap:

958

# this can happen if two branches make the same change

1063

# this can happen if two branches make the same change

959

# if unique:

1064

# if unique:

960

# raise RevlogError(_("already have %s") % hex(node[:4]))

1065

# raise RevlogError(_("already have %s") % hex(node[:4]))

961

chain = node

1066

chain = node

962

continue

1067

continue

963

delta = chunk[80:]

1068

delta = chunk[80:]

964

1069

965

for p in (p1, p2):

1070

for p in (p1, p2):

966

if not p in self.nodemap:

1071

if not p in self.nodemap:

967

raise RevlogError(_("unknown parent %s") % short(p1))

1072

raise RevlogError(_("unknown parent %s") % short(p1))

968

1073

969

if not chain:

1074

if not chain:

970

# retrieve the parent revision of the delta chain

1075

# retrieve the parent revision of the delta chain

971

chain = p1

1076

chain = p1

972

if not chain in self.nodemap:

1077

if not chain in self.nodemap:

973

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1078

raise RevlogError(_("unknown base %s") % short(chain[:4]))

974

1079

975

# full versions are inserted when the needed deltas become

1080

# full versions are inserted when the needed deltas become

976

# comparable to the uncompressed text or when the previous

1081

# comparable to the uncompressed text or when the previous

977

# version is not the one we have a delta against. We use

1082

# version is not the one we have a delta against. We use

978

# the size of the previous full rev as a proxy for the

1083

# the size of the previous full rev as a proxy for the

979

# current size.

1084

# current size.

980

1085

981

if chain == prev:

1086

if chain == prev:

982

tempd = compress(delta)

1087

tempd = compress(delta)

983

cdelta = tempd[0] + tempd[1]

1088

cdelta = tempd[0] + tempd[1]

984

textlen = mdiff.patchedsize(textlen, delta)

1089

textlen = mdiff.patchedsize(textlen, delta)

985

1090

986

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1091

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

987

# flush our writes here so we can read it in revision

1092

# flush our writes here so we can read it in revision

988

if dfh:

1093

if dfh:

989

dfh.flush()

1094

dfh.flush()

990

ifh.flush()

1095

ifh.flush()

991

text = self.revision(chain)

1096

text = self.revision(chain)

992

text = self.patches(text, [delta])

1097

text = self.patches(text, [delta])

993

chk = self.addrevision(text, transaction, link, p1, p2)

1098

chk = self.addrevision(text, transaction, link, p1, p2)

994

if chk != node:

1099

if chk != node:

995

raise RevlogError(_("consistency error adding group"))

1100

raise RevlogError(_("consistency error adding group"))

996

textlen = len(text)

1101

textlen = len(text)

997

else:

1102

else:

998

if self.version == 0:

1103

if self.version == 0:

999

e = (end, len(cdelta), base, link, p1, p2, node)

1104

e = (end, len(cdelta), base, link, p1, p2, node)

1000

else:

1105

else:

1001

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1106

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1002

link, self.rev(p1), self.rev(p2), node)

1107

link, self.rev(p1), self.rev(p2), node)

1003

self.index.append(e)

1108

self.index.append(e)

1004

self.nodemap[node] = r

1109

self.nodemap[node] = r

1005

if self.inlinedata():

1110

if self.inlinedata():

1006

ifh.write(struct.pack(self.indexformat, *e))

1111

ifh.write(struct.pack(self.indexformat, *e))

1007

ifh.write(cdelta)

1112

ifh.write(cdelta)

1008

self.checkinlinesize(transaction, ifh)

1113

self.checkinlinesize(transaction, ifh)

1009

if not self.inlinedata():

1114

if not self.inlinedata():

1010

dfh = self.opener(self.datafile, "a")

1115

dfh = self.opener(self.datafile, "a")

1011

ifh = self.opener(self.indexfile, "a")

1116

ifh = self.opener(self.indexfile, "a")

1012

else:

1117

else:

1013

if not dfh:

1118

if not dfh:

1014

# addrevision switched from inline to conventional

1119

# addrevision switched from inline to conventional

1015

# reopen the index

1120

# reopen the index

1016

dfh = self.opener(self.datafile, "a")

1121

dfh = self.opener(self.datafile, "a")

1017

ifh = self.opener(self.indexfile, "a")

1122

ifh = self.opener(self.indexfile, "a")

1018

dfh.write(cdelta)

1123

dfh.write(cdelta)

1019

ifh.write(struct.pack(self.indexformat, *e))

1124

ifh.write(struct.pack(self.indexformat, *e))

1020

1125

1021

t, r, chain, prev = r, r + 1, node, node

1126

t, r, chain, prev = r, r + 1, node, node

1022

base = self.base(t)

1127

base = self.base(t)

1023

start = self.start(base)

1128

start = self.start(base)

1024

end = self.end(t)

1129

end = self.end(t)

1025

1130

1026

if node is None:

1131

if node is None:

1027

raise RevlogError(_("group to be added is empty"))

1132

raise RevlogError(_("group to be added is empty"))

1028

return node

1133

return node

1029

1134

1030

def strip(self, rev, minlink):

1135

def strip(self, rev, minlink):

1031

if self.count() == 0 or rev >= self.count():

1136

if self.count() == 0 or rev >= self.count():

1032

return

1137

return

1033

1138

1034

if isinstance(self.index, lazyindex):

1139

if isinstance(self.index, lazyindex):

1035

self.loadindexmap()

1140

self.loadindexmap()

1036

1141

1037

# When stripping away a revision, we need to make sure it

1142

# When stripping away a revision, we need to make sure it

1038

# does not actually belong to an older changeset.

1143

# does not actually belong to an older changeset.

1039

# The minlink parameter defines the oldest revision

1144

# The minlink parameter defines the oldest revision

1040

# we're allowed to strip away.

1145

# we're allowed to strip away.

1041

while minlink > self.index[rev][-4]:

1146

while minlink > self.index[rev][-4]:

1042

rev += 1

1147

rev += 1

1043

if rev >= self.count():

1148

if rev >= self.count():

1044

return

1149

return

1045

1150

1046

# first truncate the files on disk

1151

# first truncate the files on disk

1047

end = self.start(rev)

1152

end = self.start(rev)

1048

if not self.inlinedata():

1153

if not self.inlinedata():

1049

df = self.opener(self.datafile, "a")

1154

df = self.opener(self.datafile, "a")

1050

df.truncate(end)

1155

df.truncate(end)

1051

end = rev * struct.calcsize(self.indexformat)

1156

end = rev * struct.calcsize(self.indexformat)

1052

else:

1157

else:

1053

end += rev * struct.calcsize(self.indexformat)

1158

end += rev * struct.calcsize(self.indexformat)

1054

1159

1055

indexf = self.opener(self.indexfile, "a")

1160

indexf = self.opener(self.indexfile, "a")

1056

indexf.truncate(end)

1161

indexf.truncate(end)

1057

1162

1058

# then reset internal state in memory to forget those revisions

1163

# then reset internal state in memory to forget those revisions

1059

self.cache = None

1164

self.cache = None

1060

self.chunkcache = None

1165

self.chunkcache = None

1061

for x in xrange(rev, self.count()):

1166

for x in xrange(rev, self.count()):

1062

del self.nodemap[self.node(x)]

1167

del self.nodemap[self.node(x)]

1063

1168

1064

del self.index[rev:]

1169

del self.index[rev:]

1065

1170

1066

def checksize(self):

1171

def checksize(self):

1067

expected = 0

1172

expected = 0

1068

if self.count():

1173

if self.count():

1069

expected = self.end(self.count() - 1)

1174

expected = self.end(self.count() - 1)

1070

1175

1071

try:

1176

try:

1072

f = self.opener(self.datafile)

1177

f = self.opener(self.datafile)

1073

f.seek(0, 2)

1178

f.seek(0, 2)

1074

actual = f.tell()

1179

actual = f.tell()

1075

dd = actual - expected

1180

dd = actual - expected

1076

except IOError, inst:

1181

except IOError, inst:

1077

if inst.errno != errno.ENOENT:

1182

if inst.errno != errno.ENOENT:

1078

raise

1183

raise

1079

dd = 0

1184

dd = 0

1080

1185

1081

try:

1186

try:

1082

f = self.opener(self.indexfile)

1187

f = self.opener(self.indexfile)

1083

f.seek(0, 2)

1188

f.seek(0, 2)

1084

actual = f.tell()

1189

actual = f.tell()

1085

s = struct.calcsize(self.indexformat)

1190

s = struct.calcsize(self.indexformat)

1086

i = actual / s

1191

i = actual / s

1087

di = actual - (i * s)

1192

di = actual - (i * s)

1088

if self.inlinedata():

1193

if self.inlinedata():

1089

databytes = 0

1194

databytes = 0

1090

for r in xrange(self.count()):

1195

for r in xrange(self.count()):

1091

databytes += self.length(r)

1196

databytes += self.length(r)

1092

dd = 0

1197

dd = 0

1093

di = actual - self.count() * s - databytes

1198

di = actual - self.count() * s - databytes

1094

except IOError, inst:

1199

except IOError, inst:

1095

if inst.errno != errno.ENOENT:

1200

if inst.errno != errno.ENOENT:

1096

raise

1201

raise

1097

di = 0

1202

di = 0

1098

1203

1099

return (dd, di)

1204

return (dd, di)

1100

1205

1101

1206

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii changegroup errno heapq mdiff os")
             demandload(globals(), "sha struct zlib")
             # revlog version strings
             REVLOGV0 = 0
             REVLOGNG = 1
             # revlog flags
             REVLOGNGINLINEDATA = (1 << 16)
             def flagstr(flag):
                 if flag == "inline":
                     return REVLOGNGINLINEDATA
                 raise RevlogError(_("unknown revlog flag %s" % flag))
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
+            v0shaoffset = 56
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
+            ngshaoffset = 32
             versionformat = ">i"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
-                By default we parse and load 1000 entries at a time.
-                If no position is specified, we load the whole index, and replace
-                the lazy objects in revlog with the underlying objects for
-                efficiency in cases where we look at most of the nodes.
                 """
-                def __init__(self, data, revlog, indexformat):
+                def __init__(self, dataf, size, indexformat, shaoffset):
-                    self.data = data
+                    self.dataf = dataf
+                    self.format = indexformat
                     self.s = struct.calcsize(indexformat)
                     self.indexformat = indexformat
-                    self.l = len(data)/self.s
+                    self.datasize = size
+                    self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
+                    self.allmap = 0
                     self.all = 0
-                    self.revlog = revlog
+                    self.mapfind_count = 0
+                    self.shaoffset = shaoffset
-                def load(self, pos=None):
+                def loadmap(self):
+                    """
+                    during a commit, we need to make sure the rev being added is
+                    not a duplicate.  This requires loading the entire index,
+                    which is fairly slow.  loadmap can load up just the node map,
+                    which takes much less time.
+                    """
+                    if self.allmap: return
+                    start = 0
+                    end = self.datasize
+                    self.allmap = 1
+                    cur = 0
+                    count = 0
+                    blocksize = self.s * 256
+                    self.dataf.seek(0)
+                    while cur < end:
+                        data = self.dataf.read(blocksize)
+                        off = 0
+                        for x in xrange(256):
+                            n = data[off + self.shaoffset:off + self.shaoffset + 20]
+                            self.map[n] = count
+                            count += 1
+                            if count >= self.l:
+                                break
+                            off += self.s
+                        cur += blocksize
+                def loadblock(self, blockstart, blocksize, data=None):
                     if self.all: return
-                    if pos is not None:
+                    if data is None:
-                        block = pos / 1000
+                        self.dataf.seek(blockstart)
-                        i = block * 1000
+                        data = self.dataf.read(blocksize)
-                        end = min(self.l, i + 1000)
+                    lend = len(data) / self.s
-                    else:
+                    i = blockstart / self.s
+                    off = 0
+                    for x in xrange(lend):
+                        if self.index[i + x] == None:
+                            b = data[off : off + self.s]
+                            e = struct.unpack(self.format, b)
+                            self.index[i + x] = e
+                            self.map[e[-1]] = i + x
+                        off += self.s
+                def findnode(self, node):
+                    """search backwards through the index file for a specific node"""
+                    if self.allmap: return None
+                    # hg log will cause many many searches for the manifest
+                    # nodes.  After we get called a few times, just load the whole
+                    # thing.
+                    if self.mapfind_count > 8:
+                        self.loadmap()
+                        if node in self.map:
+                            return node
+                        return None
+                    self.mapfind_count += 1
+                    last = self.l - 1
+                    while self.index[last] != None:
+                        if last == 0:
                             self.all = 1
-                        i = 0
+                            self.allmap = 1
-                        end = self.l
+                            return None
-                        self.revlog.index = self.index
+                        last -= 1
-                        self.revlog.nodemap = self.map
+                    end = (last + 1) * self.s
+                    blocksize = self.s * 256
+                    while end >= 0:
+                        start = max(end - blocksize, 0)
+                        self.dataf.seek(start)
+                        data = self.dataf.read(end - start)
+                        findend = end - start
+                        while True:
+                            # we're searching backwards, so weh have to make sure
+                            # we don't find a changeset where this node is a parent
+                            off = data.rfind(node, 0, findend)
+                            findend = off
+                            if off >= 0:
+                                i = off / self.s
+                                off = i * self.s
+                                n = data[off + self.shaoffset:off + self.shaoffset + 20]
+                                if n == node:
+                                    self.map[n] = i + start / self.s
+                                    return node
+                            else:
+                                break
+                        end -= blocksize
+                    return None
-                    while i < end:
+                def loadindex(self, i=None, end=None):
-                        if not self.index[i]:
+                    if self.all: return
-                            d = self.data[i * self.s: (i + 1) * self.s]
+                    all = False
-                            e = struct.unpack(self.indexformat, d)
+                    if i == None:
-                            self.index[i] = e
+                        blockstart = 0
-                            self.map[e[-1]] = i
+                        blocksize = (512 / self.s) * self.s
-                        i += 1
+                        end = self.datasize
+                        all = True
+                    else:
+                        if end:
+                            blockstart = i * self.s
+                            end = end * self.s
+                            blocksize = end - blockstart
+                        else:
+                            blockstart = (i & ~(32)) * self.s
+                            blocksize = self.s * 64
+                            end = blockstart + blocksize
+                    while blockstart < end:
+                        self.loadblock(blockstart, blocksize)
+                        blockstart += blocksize
+                    if all: self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
-                    self.p.load(pos)
+                    self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = item
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
-                    if self.p.all: return
+                    n = self.p.findnode(key)
-                    n = self.p.data.find(key)
+                    if n == None:
-                    if n < 0:
                         raise KeyError(key)
-                    pos = n / self.p.s
-                    self.p.load(pos)
                 def __contains__(self, key):
-                    self.p.load()
+                    if key in self.p.map:
+                        return True
+                    self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][-1]
                         except:
-                            self.p.load(i)
+                            self.p.loadindex(i)
                             yield self.p.index[i][-1]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile, defversion=0):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.indexstat = None
                     self.cache = None
                     self.chunkcache = None
                     self.defversion = defversion
                     self.load()
                 def load(self):
                     v = self.defversion
                     try:
                         f = self.opener(self.indexfile)
-                        i = f.read()
+                        i = f.read(4)
+                        f.seek(0)
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     else:
                         try:
                             st = os.fstat(f.fileno())
                         except AttributeError, inst:
                             st = None
                         else:
                             oldst = self.indexstat
                             if (oldst and st.st_dev == oldst.st_dev
                                 and st.st_ino == oldst.st_ino
                                 and st.st_mtime == oldst.st_mtime
                                 and st.st_ctime == oldst.st_ctime):
                                 return
                             self.indexstat = st
                             if len(i) > 0:
-                                v = struct.unpack(versionformat, i[:4])[0]
+                                v = struct.unpack(versionformat, i)[0]
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == 0:
                         if flags:
                             raise RevlogError(_("index %s invalid flags %x for format v0" %
                                                (self.indexfile, flags)))
                     elif fmt == REVLOGNG:
                         if flags & ~REVLOGNGINLINEDATA:
                             raise RevlogError(_("index %s invalid flags %x for revlogng" %
                                                (self.indexfile, flags)))
                     else:
                         raise RevlogError(_("index %s invalid format %d" %
                                            (self.indexfile, fmt)))
                     self.version = v
                     if v == 0:
                         self.indexformat = indexformatv0
+                        shaoffset = v0shaoffset
                     else:
                         self.indexformat = indexformatng
+                        shaoffset = ngshaoffset
                     if i:
                         if not self.inlinedata() and st and st.st_size > 10000:
                             # big index, let's parse it on demand
-                            parser = lazyparser(i, self, self.indexformat)
+                            parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
                             self.index = lazyindex(parser)
                             self.nodemap = lazymap(parser)
                         else:
+                            i = f.read()
                             self.parseindex(i)
                         if self.inlinedata():
                             # we've already got the entire data file read in, save it
                             # in the chunk data
                             self.chunkcache = (0, i)
                         if self.version != 0:
                             e = list(self.index[0])
                             type = self.ngtype(e[0])
                             e[0] = self.offset_type(0, type)
                             self.index[0] = e
                     else:
                         self.nodemap = { nullid: -1}
                         self.index = []
                 def parseindex(self, data):
                     s = struct.calcsize(self.indexformat)
                     l = len(data)
                     self.index = []
                     self.nodemap =  {nullid: -1}
                     inline = self.inlinedata()
                     off = 0
                     n = 0
                     while off < l:
                         e = struct.unpack(self.indexformat, data[off:off + s])
                         self.index.append(e)
                         self.nodemap[e[-1]] = n
                         n += 1
                         off += s
                         if inline:
                             off += e[1]
                 def ngoffset(self, q):
                     if q & 0xFFFF:
                         raise RevlogError(_('%s: incompatible revision flag %x') %
                                            (self.indexfile, type))
                     return long(q >> 16)
                 def ngtype(self, q):
                     return int(q & 0xFFFF)
                 def offset_type(self, offset, type):
                     return long(long(offset) << 16 | type)
+                def loadindex(self, start, end):
+                    """load a block of indexes all at once from the lazy parser"""
+                    if isinstance(self.index, lazyindex):
+                        self.index.p.loadindex(start, end)
                 def loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
-                        p.load()
+                        p.loadindex()
+                        self.nodemap = p.map
+                def loadmap(self):
+                    """loads the map from the lazy parser"""
+                    if isinstance(self.nodemap, lazymap):
+                        self.nodemap.p.loadmap()
+                        self.nodemap = self.nodemap.p.map
                 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev):
                     return (rev < 0) and nullid or self.index[rev][-1]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][-4]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     r = self.rev(node)
                     d = self.index[r][-3:-1]
                     if self.version == 0:
                         return d
                     return [ self.node(x) for x in d ]
                 def start(self, rev):
                     if rev < 0:
                         return -1
                     if self.version != 0:
                         return self.ngoffset(self.index[rev][0])
                     return self.index[rev][0]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = -1
                     if self.version != 0:
                         l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def length(self, rev):
                     if rev < 0:
                         return 0
                     else:
                         return self.index[rev][1]
                 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     reachable = {start: 1}
                     heads = {start: 1}
                     startrev = self.rev(start)
                     for r in xrange(startrev + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn in reachable:
                                 reachable[n] = 1
                                 heads[n] = 1
                             if pn in heads:
                                 del heads[pn]
                     return heads.keys()
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev, df=None, cachelen=4096):
                     start, length = self.start(rev), self.length(rev)
                     inline = self.inlinedata()
                     if inline:
                         start += (rev + 1) * struct.calcsize(self.indexformat)
                     end = start + length
                     def loadcache(df):
                         cache_length = max(cachelen, length) # 4k
                         if not df:
                             if inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache(df)
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache(df)
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     b1 = self.base(rev1)
                     b2 = self.base(rev2)
                     if b1 == b2 and rev1 + 1 == rev2:
                         return self.chunk(rev2)
                     else:
                         return self.diff(self.revision(self.node(rev1)),
                                          self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     if self.inlinedata():
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
+                        self.loadindex(base, rev + 1)
                     else:
+                        self.loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r, df=df))
                     text = self.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self.inlinedata():
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                     size = fp.tell()
                     if size < 131072:
                         return
                     tr.add(self.datafile, 0)
                     df = self.opener(self.datafile, 'w')
                     calc = struct.calcsize(self.indexformat)
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     if self.count():
                         x = self.index[0]
                         e = struct.pack(self.indexformat, *x)[4:]
                         l = struct.pack(versionformat, self.version)
                         fp.write(l)
                         fp.write(e)
                     for i in xrange(1, self.count()):
                         x = self.index[i]
                         e = struct.pack(self.indexformat, *x)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     self.chunkcache = None
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     if self.version == 0:
                         e = (offset, l, base, link, p1, p2, node)
                     else:
                         e = (self.offset_type(offset, 0), l, len(text),
                              base, link, self.rev(p1), self.rev(p2), node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(self.indexformat, *e)
                     if not self.inlinedata():
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, n * len(entry))
                         f = self.opener(self.datafile, "a")
                         if data[0]:
                             f.write(data[0])
                         f.write(data[1])
                         f = self.opener(self.indexfile, "a")
                     else:
                         f = self.opener(self.indexfile, "a+")
                         f.seek(0, 2)
                         transaction.add(self.indexfile, f.tell())
                     if len(self.index) == 1 and self.version != 0:
                         l = struct.pack(versionformat, self.version)
                         f.write(l)
                         entry = entry[4:]
                     f.write(entry)
                     if self.inlinedata():
                         f.write(data[0])
                         f.write(data[1])
                         self.checkinlinesize(transaction, f)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         d = self.revdiff(a, b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         yield changegroup.genchunk("%s%s" % (meta, d))
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = -1
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     ifh.seek(0, 2)
                     transaction.add(self.indexfile, ifh.tell())
                     if self.inlinedata():
                         dfh = None
                     else:
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             if self.version == 0:
                                 e = (end, len(cdelta), base, link, p1, p2, node)
                             else:
                                 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             if self.inlinedata():
                                 ifh.write(struct.pack(self.indexformat, *e))
                                 ifh.write(cdelta)
                                 self.checkinlinesize(transaction, ifh)
                                 if not self.inlinedata():
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 if not dfh:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 dfh.write(cdelta)
                                 ifh.write(struct.pack(self.indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     if node is None:
                         raise RevlogError(_("group to be added is empty"))
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self.loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][-4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self.inlinedata():
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * struct.calcsize(self.indexformat)
                     else:
                         end += rev * struct.calcsize(self.indexformat)
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
                     self.chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(self.indexformat)
                         i = actual / s
                         di = actual - (i * s)
                         if self.inlinedata():
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += self.length(r)
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)