upstream/mercurial-mirror Commit - r2080:1cbb14c0

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

17

demandload(globals(), "sha struct zlib")

17

demandload(globals(), "sha struct zlib")

18

19

# revlog version strings

19

# revlog version strings

20

REVLOGV0 = 0

20

REVLOGV0 = 0

21

REVLOGNG = 1

21

REVLOGNG = 1

22

23

# revlog flags

23

# revlog flags

24

REVLOGNGINLINEDATA = (1 << 16)

24

REVLOGNGINLINEDATA = (1 << 16)

25

26

def flagstr(flag):

26

def flagstr(flag):

27

if flag == "inline":

27

if flag == "inline":

28

return REVLOGNGINLINEDATA

28

return REVLOGNGINLINEDATA

29

raise RevlogError(_("unknown revlog flag %s" % flag))

29

raise RevlogError(_("unknown revlog flag %s" % flag))

30

31

def hash(text, p1, p2):

31

def hash(text, p1, p2):

32

"""generate a hash from the given text and its parent hashes

32

"""generate a hash from the given text and its parent hashes

33

34

This hash combines both the current file contents and its history

34

This hash combines both the current file contents and its history

35

in a manner that makes it easy to distinguish nodes with the same

35

in a manner that makes it easy to distinguish nodes with the same

36

content in the revision graph.

36

content in the revision graph.

37

"""

37

"""

38

l = [p1, p2]

38

l = [p1, p2]

39

l.sort()

39

l.sort()

40

s = sha.new(l[0])

40

s = sha.new(l[0])

41

s.update(l[1])

41

s.update(l[1])

42

s.update(text)

42

s.update(text)

43

return s.digest()

43

return s.digest()

44

45

def compress(text):

45

def compress(text):

46

""" generate a possibly-compressed representation of text """

46

""" generate a possibly-compressed representation of text """

47

if not text: return ("", text)

47

if not text: return ("", text)

48

if len(text) < 44:

48

if len(text) < 44:

49

if text[0] == '\0': return ("", text)

49

if text[0] == '\0': return ("", text)

50

return ('u', text)

50

return ('u', text)

51

bin = zlib.compress(text)

51

bin = zlib.compress(text)

52

if len(bin) > len(text):

52

if len(bin) > len(text):

53

if text[0] == '\0': return ("", text)

53

if text[0] == '\0': return ("", text)

54

return ('u', text)

54

return ('u', text)

55

return ("", bin)

55

return ("", bin)

56

57

def decompress(bin):

57

def decompress(bin):

58

""" decompress the given input """

58

""" decompress the given input """

59

if not bin: return bin

59

if not bin: return bin

60

t = bin[0]

60

t = bin[0]

61

if t == '\0': return bin

61

if t == '\0': return bin

62

if t == 'x': return zlib.decompress(bin)

62

if t == 'x': return zlib.decompress(bin)

63

if t == 'u': return bin[1:]

63

if t == 'u': return bin[1:]

64

raise RevlogError(_("unknown compression type %r") % t)

64

raise RevlogError(_("unknown compression type %r") % t)

65

66

indexformatv0 = ">4l20s20s20s"

66

indexformatv0 = ">4l20s20s20s"

67

v0shaoffset = 56

67

v0shaoffset = 56

68

# index ng:

68

# index ng:

69

# 6 bytes offset

69

# 6 bytes offset

70

# 2 bytes flags

70

# 2 bytes flags

71

# 4 bytes compressed length

71

# 4 bytes compressed length

72

# 4 bytes uncompressed length

72

# 4 bytes uncompressed length

73

# 4 bytes: base rev

73

# 4 bytes: base rev

74

# 4 bytes link rev

74

# 4 bytes link rev

75

# 4 bytes parent 1 rev

75

# 4 bytes parent 1 rev

76

# 4 bytes parent 2 rev

76

# 4 bytes parent 2 rev

77

# 32 bytes: nodeid

77

# 32 bytes: nodeid

78

indexformatng = ">Qiiiiii20s12x"

78

indexformatng = ">Qiiiiii20s12x"

79

ngshaoffset = 32

79

ngshaoffset = 32

80

versionformat = ">i"

80

versionformat = ">i"

81

82

class lazyparser(object):

82

class lazyparser(object):

83

"""

83

"""

84

this class avoids the need to parse the entirety of large indices

84

this class avoids the need to parse the entirety of large indices

85

"""

85

"""

86

def __init__(self, dataf, size, indexformat, shaoffset):

86

def __init__(self, dataf, size, indexformat, shaoffset):

87

self.dataf = dataf

87

self.dataf = dataf

88

self.format = indexformat

88

self.format = indexformat

89

self.s = struct.calcsize(indexformat)

89

self.s = struct.calcsize(indexformat)

90

self.indexformat = indexformat

90

self.indexformat = indexformat

91

self.datasize = size

91

self.datasize = size

92

self.l = size/self.s

92

self.l = size/self.s

93

self.index = [None] * self.l

93

self.index = [None] * self.l

94

self.map = {nullid: -1}

94

self.map = {nullid: -1}

95

self.allmap = 0

95

self.allmap = 0

96

self.all = 0

96

self.all = 0

97

self.mapfind_count = 0

97

self.mapfind_count = 0

98

self.shaoffset = shaoffset

98

self.shaoffset = shaoffset

99

100

def loadmap(self):

100

def loadmap(self):

101

"""

101

"""

102

during a commit, we need to make sure the rev being added is

102

during a commit, we need to make sure the rev being added is

103

not a duplicate. This requires loading the entire index,

103

not a duplicate. This requires loading the entire index,

104

which is fairly slow. loadmap can load up just the node map,

104

which is fairly slow. loadmap can load up just the node map,

105

which takes much less time.

105

which takes much less time.

106

"""

106

"""

107

if self.allmap: return

107

if self.allmap: return

108

start = 0

108

start = 0

109

end = self.datasize

109

end = self.datasize

110

self.allmap = 1

110

self.allmap = 1

111

cur = 0

111

cur = 0

112

count = 0

112

count = 0

113

blocksize = self.s * 256

113

blocksize = self.s * 256

114

self.dataf.seek(0)

114

self.dataf.seek(0)

115

while cur < end:

115

while cur < end:

116

data = self.dataf.read(blocksize)

116

data = self.dataf.read(blocksize)

117

off = 0

117

off = 0

118

for x in xrange(256):

118

for x in xrange(256):

119

n = data[off + self.shaoffset:off + self.shaoffset + 20]

119

n = data[off + self.shaoffset:off + self.shaoffset + 20]

120

self.map[n] = count

120

self.map[n] = count

121

count += 1

121

count += 1

122

if count >= self.l:

122

if count >= self.l:

123

break

123

break

124

off += self.s

124

off += self.s

125

cur += blocksize

125

cur += blocksize

126

127

def loadblock(self, blockstart, blocksize, data=None):

127

def loadblock(self, blockstart, blocksize, data=None):

128

if self.all: return

128

if self.all: return

129

if data is None:

129

if data is None:

130

self.dataf.seek(blockstart)

130

self.dataf.seek(blockstart)

131

data = self.dataf.read(blocksize)

131

data = self.dataf.read(blocksize)

132

lend = len(data) / self.s

132

lend = len(data) / self.s

133

i = blockstart / self.s

133

i = blockstart / self.s

134

off = 0

134

off = 0

135

for x in xrange(lend):

135

for x in xrange(lend):

136

if self.index[i + x] == None:

136

if self.index[i + x] == None:

137

b = data[off : off + self.s]

137

b = data[off : off + self.s]

138

e = struct.unpack(self.format, b)

138

self.index[i + x] = b

139

self.index[i + x] = e

139

n = b[self.shaoffset:self.shaoffset + 20]

140

self.map[e[-1]] = i + x

140

self.map[n] = i + x

141

off += self.s

141

off += self.s

142

143

def findnode(self, node):

143

def findnode(self, node):

144

"""search backwards through the index file for a specific node"""

144

"""search backwards through the index file for a specific node"""

145

if self.allmap: return None

145

if self.allmap: return None

146

147

# hg log will cause many many searches for the manifest

147

# hg log will cause many many searches for the manifest

148

# nodes. After we get called a few times, just load the whole

148

# nodes. After we get called a few times, just load the whole

149

# thing.

149

# thing.

150

if self.mapfind_count > 8:

150

if self.mapfind_count > 8:

151

self.loadmap()

151

self.loadmap()

152

if node in self.map:

152

if node in self.map:

153

return node

153

return node

154

return None

154

return None

155

self.mapfind_count += 1

155

self.mapfind_count += 1

156

last = self.l - 1

156

last = self.l - 1

157

while self.index[last] != None:

157

while self.index[last] != None:

158

if last == 0:

158

if last == 0:

159

self.all = 1

159

self.all = 1

160

self.allmap = 1

160

self.allmap = 1

161

return None

161

return None

162

last -= 1

162

last -= 1

163

end = (last + 1) * self.s

163

end = (last + 1) * self.s

164

blocksize = self.s * 256

164

blocksize = self.s * 256

165

while end >= 0:

165

while end >= 0:

166

start = max(end - blocksize, 0)

166

start = max(end - blocksize, 0)

167

self.dataf.seek(start)

167

self.dataf.seek(start)

168

data = self.dataf.read(end - start)

168

data = self.dataf.read(end - start)

169

findend = end - start

169

findend = end - start

170

while True:

170

while True:

171

# we're searching backwards, so weh have to make sure

171

# we're searching backwards, so weh have to make sure

172

# we don't find a changeset where this node is a parent

172

# we don't find a changeset where this node is a parent

173

off = data.rfind(node, 0, findend)

173

off = data.rfind(node, 0, findend)

174

findend = off

174

findend = off

175

if off >= 0:

175

if off >= 0:

176

i = off / self.s

176

i = off / self.s

177

off = i * self.s

177

off = i * self.s

178

n = data[off + self.shaoffset:off + self.shaoffset + 20]

178

n = data[off + self.shaoffset:off + self.shaoffset + 20]

179

if n == node:

179

if n == node:

180

self.map[n] = i + start / self.s

180

self.map[n] = i + start / self.s

181

return node

181

return node

182

else:

182

else:

183

break

183

break

184

end -= blocksize

184

end -= blocksize

185

return None

185

return None

186

187

def loadindex(self, i=None, end=None):

187

def loadindex(self, i=None, end=None):

188

if self.all: return

188

if self.all: return

189

all = False

189

all = False

190

if i == None:

190

if i == None:

191

blockstart = 0

191

blockstart = 0

192

blocksize = (512 / self.s) * self.s

192

blocksize = (512 / self.s) * self.s

193

end = self.datasize

193

end = self.datasize

194

all = True

194

all = True

195

else:

195

else:

196

if end:

196

if end:

197

blockstart = i * self.s

197

blockstart = i * self.s

198

end = end * self.s

198

end = end * self.s

199

blocksize = end - blockstart

199

blocksize = end - blockstart

200

else:

200

else:

201

blockstart = (i & ~(32)) * self.s

201

blockstart = (i & ~(32)) * self.s

202

blocksize = self.s * 64

202

blocksize = self.s * 64

203

end = blockstart + blocksize

203

end = blockstart + blocksize

204

while blockstart < end:

204

while blockstart < end:

205

self.loadblock(blockstart, blocksize)

205

self.loadblock(blockstart, blocksize)

206

blockstart += blocksize

206

blockstart += blocksize

207

if all: self.all = True

207

if all: self.all = True

208

209

class lazyindex(object):

209

class lazyindex(object):

210

"""a lazy version of the index array"""

210

"""a lazy version of the index array"""

211

def __init__(self, parser):

211

def __init__(self, parser):

212

self.p = parser

212

self.p = parser

213

def __len__(self):

213

def __len__(self):

214

return len(self.p.index)

214

return len(self.p.index)

215

def load(self, pos):

215

def load(self, pos):

216

if pos < 0:

216

if pos < 0:

217

pos += len(self.p.index)

217

pos += len(self.p.index)

218

self.p.loadindex(pos)

218

self.p.loadindex(pos)

219

return self.p.index[pos]

219

return self.p.index[pos]

220

def __getitem__(self, pos):

220

def __getitem__(self, pos):

221

ret~~urn~~ self.p.index[pos] or self.load(pos)

221

ret = self.p.index[pos] or self.load(pos)

222

if isinstance(ret, str):

223

ret = struct.unpack(self.p.indexformat, ret)

224

return ret

222

def __setitem__(self, pos, item):

225

def __setitem__(self, pos, item):

223

self.p.index[pos] = item

226

self.p.index[pos] = item

224

def __delitem__(self, pos):

227

def __delitem__(self, pos):

225

del self.p.index[pos]

228

del self.p.index[pos]

226

def append(self, e):

229

def append(self, e):

227

self.p.index.append(e)

230

self.p.index.append(e)

228

231

229

class lazymap(object):

232

class lazymap(object):

230

"""a lazy version of the node map"""

233

"""a lazy version of the node map"""

231

def __init__(self, parser):

234

def __init__(self, parser):

232

self.p = parser

235

self.p = parser

233

def load(self, key):

236

def load(self, key):

234

n = self.p.findnode(key)

237

n = self.p.findnode(key)

235

if n == None:

238

if n == None:

236

raise KeyError(key)

239

raise KeyError(key)

237

def __contains__(self, key):

240

def __contains__(self, key):

238

if key in self.p.map:

241

if key in self.p.map:

239

return True

242

return True

240

self.p.loadmap()

243

self.p.loadmap()

241

return key in self.p.map

244

return key in self.p.map

242

def __iter__(self):

245

def __iter__(self):

243

yield nullid

246

yield nullid

244

for i in xrange(self.p.l):

247

for i in xrange(self.p.l):

245

try:

248

ret = self.p.index[i]

246

yield self.p.index[i][-1]

249

if not ret:

247

except:

248

self.p.loadindex(i)

250

self.p.loadindex(i)

249

~~yield~~ self.p.index[i][-1]

251

ret = self.p.index[i]

252

if isinstance(ret, str):

253

ret = struct.unpack(self.p.indexformat, ret)

254

yield ret[-1]

250

def __getitem__(self, key):

255

def __getitem__(self, key):

251

try:

256

try:

252

return self.p.map[key]

257

return self.p.map[key]

253

except KeyError:

258

except KeyError:

254

try:

259

try:

255

self.load(key)

260

self.load(key)

256

return self.p.map[key]

261

return self.p.map[key]

257

except KeyError:

262

except KeyError:

258

raise KeyError("node " + hex(key))

263

raise KeyError("node " + hex(key))

259

def __setitem__(self, key, val):

264

def __setitem__(self, key, val):

260

self.p.map[key] = val

265

self.p.map[key] = val

261

def __delitem__(self, key):

266

def __delitem__(self, key):

262

del self.p.map[key]

267

del self.p.map[key]

263

268

264

class RevlogError(Exception): pass

269

class RevlogError(Exception): pass

265

270

266

class revlog(object):

271

class revlog(object):

267

"""

272

"""

268

the underlying revision storage object

273

the underlying revision storage object

269

274

270

A revlog consists of two parts, an index and the revision data.

275

A revlog consists of two parts, an index and the revision data.

271

276

272

The index is a file with a fixed record size containing

277

The index is a file with a fixed record size containing

273

information on each revision, includings its nodeid (hash), the

278

information on each revision, includings its nodeid (hash), the

274

nodeids of its parents, the position and offset of its data within

279

nodeids of its parents, the position and offset of its data within

275

the data file, and the revision it's based on. Finally, each entry

280

the data file, and the revision it's based on. Finally, each entry

276

contains a linkrev entry that can serve as a pointer to external

281

contains a linkrev entry that can serve as a pointer to external

277

data.

282

data.

278

283

279

The revision data itself is a linear collection of data chunks.

284

The revision data itself is a linear collection of data chunks.

280

Each chunk represents a revision and is usually represented as a

285

Each chunk represents a revision and is usually represented as a

281

delta against the previous chunk. To bound lookup time, runs of

286

delta against the previous chunk. To bound lookup time, runs of

282

deltas are limited to about 2 times the length of the original

287

deltas are limited to about 2 times the length of the original

283

version data. This makes retrieval of a version proportional to

288

version data. This makes retrieval of a version proportional to

284

its size, or O(1) relative to the number of revisions.

289

its size, or O(1) relative to the number of revisions.

285

290

286

Both pieces of the revlog are written to in an append-only

291

Both pieces of the revlog are written to in an append-only

287

fashion, which means we never need to rewrite a file to insert or

292

fashion, which means we never need to rewrite a file to insert or

288

remove data, and can use some simple techniques to avoid the need

293

remove data, and can use some simple techniques to avoid the need

289

for locking while reading.

294

for locking while reading.

290

"""

295

"""

291

def __init__(self, opener, indexfile, datafile, defversion=0):

296

def __init__(self, opener, indexfile, datafile, defversion=0):

292

"""

297

"""

293

create a revlog object

298

create a revlog object

294

299

295

opener is a function that abstracts the file opening operation

300

opener is a function that abstracts the file opening operation

296

and can be used to implement COW semantics or the like.

301

and can be used to implement COW semantics or the like.

297

"""

302

"""

298

self.indexfile = indexfile

303

self.indexfile = indexfile

299

self.datafile = datafile

304

self.datafile = datafile

300

self.opener = opener

305

self.opener = opener

301

306

302

self.indexstat = None

307

self.indexstat = None

303

self.cache = None

308

self.cache = None

304

self.chunkcache = None

309

self.chunkcache = None

305

self.defversion = defversion

310

self.defversion = defversion

306

self.load()

311

self.load()

307

312

308

def load(self):

313

def load(self):

309

v = self.defversion

314

v = self.defversion

310

try:

315

try:

311

f = self.opener(self.indexfile)

316

f = self.opener(self.indexfile)

312

i = f.read(4)

317

i = f.read(4)

313

f.seek(0)

318

f.seek(0)

314

except IOError, inst:

319

except IOError, inst:

315

if inst.errno != errno.ENOENT:

320

if inst.errno != errno.ENOENT:

316

raise

321

raise

317

i = ""

322

i = ""

318

else:

323

else:

319

try:

324

try:

320

st = os.fstat(f.fileno())

325

st = os.fstat(f.fileno())

321

except AttributeError, inst:

326

except AttributeError, inst:

322

st = None

327

st = None

323

else:

328

else:

324

oldst = self.indexstat

329

oldst = self.indexstat

325

if (oldst and st.st_dev == oldst.st_dev

330

if (oldst and st.st_dev == oldst.st_dev

326

and st.st_ino == oldst.st_ino

331

and st.st_ino == oldst.st_ino

327

and st.st_mtime == oldst.st_mtime

332

and st.st_mtime == oldst.st_mtime

328

and st.st_ctime == oldst.st_ctime):

333

and st.st_ctime == oldst.st_ctime):

329

return

334

return

330

self.indexstat = st

335

self.indexstat = st

331

if len(i) > 0:

336

if len(i) > 0:

332

v = struct.unpack(versionformat, i)[0]

337

v = struct.unpack(versionformat, i)[0]

333

flags = v & ~0xFFFF

338

flags = v & ~0xFFFF

334

fmt = v & 0xFFFF

339

fmt = v & 0xFFFF

335

if fmt == 0:

340

if fmt == 0:

336

if flags:

341

if flags:

337

raise RevlogError(_("index %s invalid flags %x for format v0" %

342

raise RevlogError(_("index %s invalid flags %x for format v0" %

338

(self.indexfile, flags)))

343

(self.indexfile, flags)))

339

elif fmt == REVLOGNG:

344

elif fmt == REVLOGNG:

340

if flags & ~REVLOGNGINLINEDATA:

345

if flags & ~REVLOGNGINLINEDATA:

341

raise RevlogError(_("index %s invalid flags %x for revlogng" %

346

raise RevlogError(_("index %s invalid flags %x for revlogng" %

342

(self.indexfile, flags)))

347

(self.indexfile, flags)))

343

else:

348

else:

344

raise RevlogError(_("index %s invalid format %d" %

349

raise RevlogError(_("index %s invalid format %d" %

345

(self.indexfile, fmt)))

350

(self.indexfile, fmt)))

346

self.version = v

351

self.version = v

347

if v == 0:

352

if v == 0:

348

self.indexformat = indexformatv0

353

self.indexformat = indexformatv0

349

shaoffset = v0shaoffset

354

shaoffset = v0shaoffset

350

else:

355

else:

351

self.indexformat = indexformatng

356

self.indexformat = indexformatng

352

shaoffset = ngshaoffset

357

shaoffset = ngshaoffset

353

358

354

if i:

359

if i:

355

if not self.inlinedata() and st and st.st_size > 10000:

360

if not self.inlinedata() and st and st.st_size > 10000:

356

# big index, let's parse it on demand

361

# big index, let's parse it on demand

357

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

362

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

358

self.index = lazyindex(parser)

363

self.index = lazyindex(parser)

359

self.nodemap = lazymap(parser)

364

self.nodemap = lazymap(parser)

360

else:

365

else:

361

i = f.read()

366

i = f.read()

362

self.parseindex(i)

367

self.parseindex(i)

363

if self.inlinedata():

368

if self.inlinedata():

364

# we've already got the entire data file read in, save it

369

# we've already got the entire data file read in, save it

365

# in the chunk data

370

# in the chunk data

366

self.chunkcache = (0, i)

371

self.chunkcache = (0, i)

367

if self.version != 0:

372

if self.version != 0:

368

e = list(self.index[0])

373

e = list(self.index[0])

369

type = self.ngtype(e[0])

374

type = self.ngtype(e[0])

370

e[0] = self.offset_type(0, type)

375

e[0] = self.offset_type(0, type)

371

self.index[0] = e

376

self.index[0] = e

372

else:

377

else:

373

self.nodemap = { nullid: -1}

378

self.nodemap = { nullid: -1}

374

self.index = []

379

self.index = []

375

380

376

381

377

def parseindex(self, data):

382

def parseindex(self, data):

378

s = struct.calcsize(self.indexformat)

383

s = struct.calcsize(self.indexformat)

379

l = len(data)

384

l = len(data)

380

self.index = []

385

self.index = []

381

self.nodemap = {nullid: -1}

386

self.nodemap = {nullid: -1}

382

inline = self.inlinedata()

387

inline = self.inlinedata()

383

off = 0

388

off = 0

384

n = 0

389

n = 0

385

while off < l:

390

while off < l:

386

e = struct.unpack(self.indexformat, data[off:off + s])

391

e = struct.unpack(self.indexformat, data[off:off + s])

387

self.index.append(e)

392

self.index.append(e)

388

self.nodemap[e[-1]] = n

393

self.nodemap[e[-1]] = n

389

n += 1

394

n += 1

390

off += s

395

off += s

391

if inline:

396

if inline:

392

off += e[1]

397

off += e[1]

393

398

394

def ngoffset(self, q):

399

def ngoffset(self, q):

395

if q & 0xFFFF:

400

if q & 0xFFFF:

396

raise RevlogError(_('%s: incompatible revision flag %x') %

401

raise RevlogError(_('%s: incompatible revision flag %x') %

397

(self.indexfile, type))

402

(self.indexfile, type))

398

return long(q >> 16)

403

return long(q >> 16)

399

404

400

def ngtype(self, q):

405

def ngtype(self, q):

401

return int(q & 0xFFFF)

406

return int(q & 0xFFFF)

402

407

403

def offset_type(self, offset, type):

408

def offset_type(self, offset, type):

404

return long(long(offset) << 16 | type)

409

return long(long(offset) << 16 | type)

405

410

406

def loadindex(self, start, end):

411

def loadindex(self, start, end):

407

"""load a block of indexes all at once from the lazy parser"""

412

"""load a block of indexes all at once from the lazy parser"""

408

if isinstance(self.index, lazyindex):

413

if isinstance(self.index, lazyindex):

409

self.index.p.loadindex(start, end)

414

self.index.p.loadindex(start, end)

410

415

411

def loadindexmap(self):

416

def loadindexmap(self):

412

"""loads both the map and the index from the lazy parser"""

417

"""loads both the map and the index from the lazy parser"""

413

if isinstance(self.index, lazyindex):

418

if isinstance(self.index, lazyindex):

414

p = self.index.p

419

p = self.index.p

415

p.loadindex()

420

p.loadindex()

416

self.nodemap = p.map

421

self.nodemap = p.map

417

422

418

def loadmap(self):

423

def loadmap(self):

419

"""loads the map from the lazy parser"""

424

"""loads the map from the lazy parser"""

420

if isinstance(self.nodemap, lazymap):

425

if isinstance(self.nodemap, lazymap):

421

self.nodemap.p.loadmap()

426

self.nodemap.p.loadmap()

422

self.nodemap = self.nodemap.p.map

427

self.nodemap = self.nodemap.p.map

423

428

424

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

429

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

425

def tip(self): return self.node(len(self.index) - 1)

430

def tip(self): return self.node(len(self.index) - 1)

426

def count(self): return len(self.index)

431

def count(self): return len(self.index)

427

def node(self, rev):

432

def node(self, rev):

428

return (rev < 0) and nullid or self.index[rev][-1]

433

return (rev < 0) and nullid or self.index[rev][-1]

429

def rev(self, node):

434

def rev(self, node):

430

try:

435

try:

431

return self.nodemap[node]

436

return self.nodemap[node]

432

except KeyError:

437

except KeyError:

433

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

438

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

434

def linkrev(self, node): return self.index[self.rev(node)][-4]

439

def linkrev(self, node): return self.index[self.rev(node)][-4]

435

def parents(self, node):

440

def parents(self, node):

436

if node == nullid: return (nullid, nullid)

441

if node == nullid: return (nullid, nullid)

437

r = self.rev(node)

442

r = self.rev(node)

438

d = self.index[r][-3:-1]

443

d = self.index[r][-3:-1]

439

if self.version == 0:

444

if self.version == 0:

440

return d

445

return d

441

return [ self.node(x) for x in d ]

446

return [ self.node(x) for x in d ]

442

def start(self, rev):

447

def start(self, rev):

443

if rev < 0:

448

if rev < 0:

444

return -1

449

return -1

445

if self.version != 0:

450

if self.version != 0:

446

return self.ngoffset(self.index[rev][0])

451

return self.ngoffset(self.index[rev][0])

447

return self.index[rev][0]

452

return self.index[rev][0]

448

453

449

def end(self, rev): return self.start(rev) + self.length(rev)

454

def end(self, rev): return self.start(rev) + self.length(rev)

450

455

451

def size(self, rev):

456

def size(self, rev):

452

"""return the length of the uncompressed text for a given revision"""

457

"""return the length of the uncompressed text for a given revision"""

453

l = -1

458

l = -1

454

if self.version != 0:

459

if self.version != 0:

455

l = self.index[rev][2]

460

l = self.index[rev][2]

456

if l >= 0:

461

if l >= 0:

457

return l

462

return l

458

463

459

t = self.revision(self.node(rev))

464

t = self.revision(self.node(rev))

460

return len(t)

465

return len(t)

461

466

462

# alternate implementation, The advantage to this code is it

467

# alternate implementation, The advantage to this code is it

463

# will be faster for a single revision. But, the results are not

468

# will be faster for a single revision. But, the results are not

464

# cached, so finding the size of every revision will be slower.

469

# cached, so finding the size of every revision will be slower.

465

"""

470

"""

466

if self.cache and self.cache[1] == rev:

471

if self.cache and self.cache[1] == rev:

467

return len(self.cache[2])

472

return len(self.cache[2])

468

473

469

base = self.base(rev)

474

base = self.base(rev)

470

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

475

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

471

base = self.cache[1]

476

base = self.cache[1]

472

text = self.cache[2]

477

text = self.cache[2]

473

else:

478

else:

474

text = self.revision(self.node(base))

479

text = self.revision(self.node(base))

475

480

476

l = len(text)

481

l = len(text)

477

for x in xrange(base + 1, rev + 1):

482

for x in xrange(base + 1, rev + 1):

478

l = mdiff.patchedsize(l, self.chunk(x))

483

l = mdiff.patchedsize(l, self.chunk(x))

479

return l

484

return l

480

"""

485

"""

481

486

482

def length(self, rev):

487

def length(self, rev):

483

if rev < 0:

488

if rev < 0:

484

return 0

489

return 0

485

else:

490

else:

486

return self.index[rev][1]

491

return self.index[rev][1]

487

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

492

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

488

493

489

def reachable(self, rev, stop=None):

494

def reachable(self, rev, stop=None):

490

reachable = {}

495

reachable = {}

491

visit = [rev]

496

visit = [rev]

492

reachable[rev] = 1

497

reachable[rev] = 1

493

if stop:

498

if stop:

494

stopn = self.rev(stop)

499

stopn = self.rev(stop)

495

else:

500

else:

496

stopn = 0

501

stopn = 0

497

while visit:

502

while visit:

498

n = visit.pop(0)

503

n = visit.pop(0)

499

if n == stop:

504

if n == stop:

500

continue

505

continue

501

if n == nullid:

506

if n == nullid:

502

continue

507

continue

503

for p in self.parents(n):

508

for p in self.parents(n):

504

if self.rev(p) < stopn:

509

if self.rev(p) < stopn:

505

continue

510

continue

506

if p not in reachable:

511

if p not in reachable:

507

reachable[p] = 1

512

reachable[p] = 1

508

visit.append(p)

513

visit.append(p)

509

return reachable

514

return reachable

510

515

511

def nodesbetween(self, roots=None, heads=None):

516

def nodesbetween(self, roots=None, heads=None):

512

"""Return a tuple containing three elements. Elements 1 and 2 contain

517

"""Return a tuple containing three elements. Elements 1 and 2 contain

513

a final list bases and heads after all the unreachable ones have been

518

a final list bases and heads after all the unreachable ones have been

514

pruned. Element 0 contains a topologically sorted list of all

519

pruned. Element 0 contains a topologically sorted list of all

515

520

516

nodes that satisfy these constraints:

521

nodes that satisfy these constraints:

517

1. All nodes must be descended from a node in roots (the nodes on

522

1. All nodes must be descended from a node in roots (the nodes on

518

roots are considered descended from themselves).

523

roots are considered descended from themselves).

519

2. All nodes must also be ancestors of a node in heads (the nodes in

524

2. All nodes must also be ancestors of a node in heads (the nodes in

520

heads are considered to be their own ancestors).

525

heads are considered to be their own ancestors).

521

526

522

If roots is unspecified, nullid is assumed as the only root.

527

If roots is unspecified, nullid is assumed as the only root.

523

If heads is unspecified, it is taken to be the output of the

528

If heads is unspecified, it is taken to be the output of the

524

heads method (i.e. a list of all nodes in the repository that

529

heads method (i.e. a list of all nodes in the repository that

525

have no children)."""

530

have no children)."""

526

nonodes = ([], [], [])

531

nonodes = ([], [], [])

527

if roots is not None:

532

if roots is not None:

528

roots = list(roots)

533

roots = list(roots)

529

if not roots:

534

if not roots:

530

return nonodes

535

return nonodes

531

lowestrev = min([self.rev(n) for n in roots])

536

lowestrev = min([self.rev(n) for n in roots])

532

else:

537

else:

533

roots = [nullid] # Everybody's a descendent of nullid

538

roots = [nullid] # Everybody's a descendent of nullid

534

lowestrev = -1

539

lowestrev = -1

535

if (lowestrev == -1) and (heads is None):

540

if (lowestrev == -1) and (heads is None):

536

# We want _all_ the nodes!

541

# We want _all_ the nodes!

537

return ([self.node(r) for r in xrange(0, self.count())],

542

return ([self.node(r) for r in xrange(0, self.count())],

538

[nullid], list(self.heads()))

543

[nullid], list(self.heads()))

539

if heads is None:

544

if heads is None:

540

# All nodes are ancestors, so the latest ancestor is the last

545

# All nodes are ancestors, so the latest ancestor is the last

541

# node.

546

# node.

542

highestrev = self.count() - 1

547

highestrev = self.count() - 1

543

# Set ancestors to None to signal that every node is an ancestor.

548

# Set ancestors to None to signal that every node is an ancestor.

544

ancestors = None

549

ancestors = None

545

# Set heads to an empty dictionary for later discovery of heads

550

# Set heads to an empty dictionary for later discovery of heads

546

heads = {}

551

heads = {}

547

else:

552

else:

548

heads = list(heads)

553

heads = list(heads)

549

if not heads:

554

if not heads:

550

return nonodes

555

return nonodes

551

ancestors = {}

556

ancestors = {}

552

# Start at the top and keep marking parents until we're done.

557

# Start at the top and keep marking parents until we're done.

553

nodestotag = heads[:]

558

nodestotag = heads[:]

554

# Turn heads into a dictionary so we can remove 'fake' heads.

559

# Turn heads into a dictionary so we can remove 'fake' heads.

555

# Also, later we will be using it to filter out the heads we can't

560

# Also, later we will be using it to filter out the heads we can't

556

# find from roots.

561

# find from roots.

557

heads = dict.fromkeys(heads, 0)

562

heads = dict.fromkeys(heads, 0)

558

# Remember where the top was so we can use it as a limit later.

563

# Remember where the top was so we can use it as a limit later.

559

highestrev = max([self.rev(n) for n in nodestotag])

564

highestrev = max([self.rev(n) for n in nodestotag])

560

while nodestotag:

565

while nodestotag:

561

# grab a node to tag

566

# grab a node to tag

562

n = nodestotag.pop()

567

n = nodestotag.pop()

563

# Never tag nullid

568

# Never tag nullid

564

if n == nullid:

569

if n == nullid:

565

continue

570

continue

566

# A node's revision number represents its place in a

571

# A node's revision number represents its place in a

567

# topologically sorted list of nodes.

572

# topologically sorted list of nodes.

568

r = self.rev(n)

573

r = self.rev(n)

569

if r >= lowestrev:

574

if r >= lowestrev:

570

if n not in ancestors:

575

if n not in ancestors:

571

# If we are possibly a descendent of one of the roots

576

# If we are possibly a descendent of one of the roots

572

# and we haven't already been marked as an ancestor

577

# and we haven't already been marked as an ancestor

573

ancestors[n] = 1 # Mark as ancestor

578

ancestors[n] = 1 # Mark as ancestor

574

# Add non-nullid parents to list of nodes to tag.

579

# Add non-nullid parents to list of nodes to tag.

575

nodestotag.extend([p for p in self.parents(n) if

580

nodestotag.extend([p for p in self.parents(n) if

576

p != nullid])

581

p != nullid])

577

elif n in heads: # We've seen it before, is it a fake head?

582

elif n in heads: # We've seen it before, is it a fake head?

578

# So it is, real heads should not be the ancestors of

583

# So it is, real heads should not be the ancestors of

579

# any other heads.

584

# any other heads.

580

heads.pop(n)

585

heads.pop(n)

581

if not ancestors:

586

if not ancestors:

582

return nonodes

587

return nonodes

583

# Now that we have our set of ancestors, we want to remove any

588

# Now that we have our set of ancestors, we want to remove any

584

# roots that are not ancestors.

589

# roots that are not ancestors.

585

590

586

# If one of the roots was nullid, everything is included anyway.

591

# If one of the roots was nullid, everything is included anyway.

587

if lowestrev > -1:

592

if lowestrev > -1:

588

# But, since we weren't, let's recompute the lowest rev to not

593

# But, since we weren't, let's recompute the lowest rev to not

589

# include roots that aren't ancestors.

594

# include roots that aren't ancestors.

590

595

591

# Filter out roots that aren't ancestors of heads

596

# Filter out roots that aren't ancestors of heads

592

roots = [n for n in roots if n in ancestors]

597

roots = [n for n in roots if n in ancestors]

593

# Recompute the lowest revision

598

# Recompute the lowest revision

594

if roots:

599

if roots:

595

lowestrev = min([self.rev(n) for n in roots])

600

lowestrev = min([self.rev(n) for n in roots])

596

else:

601

else:

597

# No more roots? Return empty list

602

# No more roots? Return empty list

598

return nonodes

603

return nonodes

599

else:

604

else:

600

# We are descending from nullid, and don't need to care about

605

# We are descending from nullid, and don't need to care about

601

# any other roots.

606

# any other roots.

602

lowestrev = -1

607

lowestrev = -1

603

roots = [nullid]

608

roots = [nullid]

604

# Transform our roots list into a 'set' (i.e. a dictionary where the

609

# Transform our roots list into a 'set' (i.e. a dictionary where the

605

# values don't matter.

610

# values don't matter.

606

descendents = dict.fromkeys(roots, 1)

611

descendents = dict.fromkeys(roots, 1)

607

# Also, keep the original roots so we can filter out roots that aren't

612

# Also, keep the original roots so we can filter out roots that aren't

608

# 'real' roots (i.e. are descended from other roots).

613

# 'real' roots (i.e. are descended from other roots).

609

roots = descendents.copy()

614

roots = descendents.copy()

610

# Our topologically sorted list of output nodes.

615

# Our topologically sorted list of output nodes.

611

orderedout = []

616

orderedout = []

612

# Don't start at nullid since we don't want nullid in our output list,

617

# Don't start at nullid since we don't want nullid in our output list,

613

# and if nullid shows up in descedents, empty parents will look like

618

# and if nullid shows up in descedents, empty parents will look like

614

# they're descendents.

619

# they're descendents.

615

for r in xrange(max(lowestrev, 0), highestrev + 1):

620

for r in xrange(max(lowestrev, 0), highestrev + 1):

616

n = self.node(r)

621

n = self.node(r)

617

isdescendent = False

622

isdescendent = False

618

if lowestrev == -1: # Everybody is a descendent of nullid

623

if lowestrev == -1: # Everybody is a descendent of nullid

619

isdescendent = True

624

isdescendent = True

620

elif n in descendents:

625

elif n in descendents:

621

# n is already a descendent

626

# n is already a descendent

622

isdescendent = True

627

isdescendent = True

623

# This check only needs to be done here because all the roots

628

# This check only needs to be done here because all the roots

624

# will start being marked is descendents before the loop.

629

# will start being marked is descendents before the loop.

625

if n in roots:

630

if n in roots:

626

# If n was a root, check if it's a 'real' root.

631

# If n was a root, check if it's a 'real' root.

627

p = tuple(self.parents(n))

632

p = tuple(self.parents(n))

628

# If any of its parents are descendents, it's not a root.

633

# If any of its parents are descendents, it's not a root.

629

if (p[0] in descendents) or (p[1] in descendents):

634

if (p[0] in descendents) or (p[1] in descendents):

630

roots.pop(n)

635

roots.pop(n)

631

else:

636

else:

632

p = tuple(self.parents(n))

637

p = tuple(self.parents(n))

633

# A node is a descendent if either of its parents are

638

# A node is a descendent if either of its parents are

634

# descendents. (We seeded the dependents list with the roots

639

# descendents. (We seeded the dependents list with the roots

635

# up there, remember?)

640

# up there, remember?)

636

if (p[0] in descendents) or (p[1] in descendents):

641

if (p[0] in descendents) or (p[1] in descendents):

637

descendents[n] = 1

642

descendents[n] = 1

638

isdescendent = True

643

isdescendent = True

639

if isdescendent and ((ancestors is None) or (n in ancestors)):

644

if isdescendent and ((ancestors is None) or (n in ancestors)):

640

# Only include nodes that are both descendents and ancestors.

645

# Only include nodes that are both descendents and ancestors.

641

orderedout.append(n)

646

orderedout.append(n)

642

if (ancestors is not None) and (n in heads):

647

if (ancestors is not None) and (n in heads):

643

# We're trying to figure out which heads are reachable

648

# We're trying to figure out which heads are reachable

644

# from roots.

649

# from roots.

645

# Mark this head as having been reached

650

# Mark this head as having been reached

646

heads[n] = 1

651

heads[n] = 1

647

elif ancestors is None:

652

elif ancestors is None:

648

# Otherwise, we're trying to discover the heads.

653

# Otherwise, we're trying to discover the heads.

649

# Assume this is a head because if it isn't, the next step

654

# Assume this is a head because if it isn't, the next step

650

# will eventually remove it.

655

# will eventually remove it.

651

heads[n] = 1

656

heads[n] = 1

652

# But, obviously its parents aren't.

657

# But, obviously its parents aren't.

653

for p in self.parents(n):

658

for p in self.parents(n):

654

heads.pop(p, None)

659

heads.pop(p, None)

655

heads = [n for n in heads.iterkeys() if heads[n] != 0]

660

heads = [n for n in heads.iterkeys() if heads[n] != 0]

656

roots = roots.keys()

661

roots = roots.keys()

657

assert orderedout

662

assert orderedout

658

assert roots

663

assert roots

659

assert heads

664

assert heads

660

return (orderedout, roots, heads)

665

return (orderedout, roots, heads)

661

666

662

def heads(self, start=None):

667

def heads(self, start=None):

663

"""return the list of all nodes that have no children

668

"""return the list of all nodes that have no children

664

669

665

if start is specified, only heads that are descendants of

670

if start is specified, only heads that are descendants of

666

start will be returned

671

start will be returned

667

672

668

"""

673

"""

669

if start is None:

674

if start is None:

670

start = nullid

675

start = nullid

671

reachable = {start: 1}

676

reachable = {start: 1}

672

heads = {start: 1}

677

heads = {start: 1}

673

startrev = self.rev(start)

678

startrev = self.rev(start)

674

679

675

for r in xrange(startrev + 1, self.count()):

680

for r in xrange(startrev + 1, self.count()):

676

n = self.node(r)

681

n = self.node(r)

677

for pn in self.parents(n):

682

for pn in self.parents(n):

678

if pn in reachable:

683

if pn in reachable:

679

reachable[n] = 1

684

reachable[n] = 1

680

heads[n] = 1

685

heads[n] = 1

681

if pn in heads:

686

if pn in heads:

682

del heads[pn]

687

del heads[pn]

683

return heads.keys()

688

return heads.keys()

684

689

685

def children(self, node):

690

def children(self, node):

686

"""find the children of a given node"""

691

"""find the children of a given node"""

687

c = []

692

c = []

688

p = self.rev(node)

693

p = self.rev(node)

689

for r in range(p + 1, self.count()):

694

for r in range(p + 1, self.count()):

690

n = self.node(r)

695

n = self.node(r)

691

for pn in self.parents(n):

696

for pn in self.parents(n):

692

if pn == node:

697

if pn == node:

693

c.append(n)

698

c.append(n)

694

continue

699

continue

695

elif pn == nullid:

700

elif pn == nullid:

696

continue

701

continue

697

return c

702

return c

698

703

699

def lookup(self, id):

704

def lookup(self, id):

700

"""locate a node based on revision number or subset of hex nodeid"""

705

"""locate a node based on revision number or subset of hex nodeid"""

701

try:

706

try:

702

rev = int(id)

707

rev = int(id)

703

if str(rev) != id: raise ValueError

708

if str(rev) != id: raise ValueError

704

if rev < 0: rev = self.count() + rev

709

if rev < 0: rev = self.count() + rev

705

if rev < 0 or rev >= self.count(): raise ValueError

710

if rev < 0 or rev >= self.count(): raise ValueError

706

return self.node(rev)

711

return self.node(rev)

707

except (ValueError, OverflowError):

712

except (ValueError, OverflowError):

708

c = []

713

c = []

709

for n in self.nodemap:

714

for n in self.nodemap:

710

if hex(n).startswith(id):

715

if hex(n).startswith(id):

711

c.append(n)

716

c.append(n)

712

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

717

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

713

if len(c) < 1: raise RevlogError(_("No match found"))

718

if len(c) < 1: raise RevlogError(_("No match found"))

714

return c[0]

719

return c[0]

715

720

716

return None

721

return None

717

722

718

def diff(self, a, b):

723

def diff(self, a, b):

719

"""return a delta between two revisions"""

724

"""return a delta between two revisions"""

720

return mdiff.textdiff(a, b)

725

return mdiff.textdiff(a, b)

721

726

722

def patches(self, t, pl):

727

def patches(self, t, pl):

723

"""apply a list of patches to a string"""

728

"""apply a list of patches to a string"""

724

return mdiff.patches(t, pl)

729

return mdiff.patches(t, pl)

725

730

726

def chunk(self, rev, df=None, cachelen=4096):

731

def chunk(self, rev, df=None, cachelen=4096):

727

start, length = self.start(rev), self.length(rev)

732

start, length = self.start(rev), self.length(rev)

728

inline = self.inlinedata()

733

inline = self.inlinedata()

729

if inline:

734

if inline:

730

start += (rev + 1) * struct.calcsize(self.indexformat)

735

start += (rev + 1) * struct.calcsize(self.indexformat)

731

end = start + length

736

end = start + length

732

def loadcache(df):

737

def loadcache(df):

733

cache_length = max(cachelen, length) # 4k

738

cache_length = max(cachelen, length) # 4k

734

if not df:

739

if not df:

735

if inline:

740

if inline:

736

df = self.opener(self.indexfile)

741

df = self.opener(self.indexfile)

737

else:

742

else:

738

df = self.opener(self.datafile)

743

df = self.opener(self.datafile)

739

df.seek(start)

744

df.seek(start)

740

self.chunkcache = (start, df.read(cache_length))

745

self.chunkcache = (start, df.read(cache_length))

741

746

742

if not self.chunkcache:

747

if not self.chunkcache:

743

loadcache(df)

748

loadcache(df)

744

749

745

cache_start = self.chunkcache[0]

750

cache_start = self.chunkcache[0]

746

cache_end = cache_start + len(self.chunkcache[1])

751

cache_end = cache_start + len(self.chunkcache[1])

747

if start >= cache_start and end <= cache_end:

752

if start >= cache_start and end <= cache_end:

748

# it is cached

753

# it is cached

749

offset = start - cache_start

754

offset = start - cache_start

750

else:

755

else:

751

loadcache(df)

756

loadcache(df)

752

offset = 0

757

offset = 0

753

758

754

#def checkchunk():

759

#def checkchunk():

755

# df = self.opener(self.datafile)

760

# df = self.opener(self.datafile)

756

# df.seek(start)

761

# df.seek(start)

757

# return df.read(length)

762

# return df.read(length)

758

#assert s == checkchunk()

763

#assert s == checkchunk()

759

return decompress(self.chunkcache[1][offset:offset + length])

764

return decompress(self.chunkcache[1][offset:offset + length])

760

765

761

def delta(self, node):

766

def delta(self, node):

762

"""return or calculate a delta between a node and its predecessor"""

767

"""return or calculate a delta between a node and its predecessor"""

763

r = self.rev(node)

768

r = self.rev(node)

764

return self.revdiff(r - 1, r)

769

return self.revdiff(r - 1, r)

765

770

766

def revdiff(self, rev1, rev2):

771

def revdiff(self, rev1, rev2):

767

"""return or calculate a delta between two revisions"""

772

"""return or calculate a delta between two revisions"""

768

b1 = self.base(rev1)

773

b1 = self.base(rev1)

769

b2 = self.base(rev2)

774

b2 = self.base(rev2)

770

if b1 == b2 and rev1 + 1 == rev2:

775

if b1 == b2 and rev1 + 1 == rev2:

771

return self.chunk(rev2)

776

return self.chunk(rev2)

772

else:

777

else:

773

return self.diff(self.revision(self.node(rev1)),

778

return self.diff(self.revision(self.node(rev1)),

774

self.revision(self.node(rev2)))

779

self.revision(self.node(rev2)))

775

780

776

def revision(self, node):

781

def revision(self, node):

777

"""return an uncompressed revision of a given"""

782

"""return an uncompressed revision of a given"""

778

if node == nullid: return ""

783

if node == nullid: return ""

779

if self.cache and self.cache[0] == node: return self.cache[2]

784

if self.cache and self.cache[0] == node: return self.cache[2]

780

785

781

# look up what we need to read

786

# look up what we need to read

782

text = None

787

text = None

783

rev = self.rev(node)

788

rev = self.rev(node)

784

base = self.base(rev)

789

base = self.base(rev)

785

790

786

if self.inlinedata():

791

if self.inlinedata():

787

# we probably have the whole chunk cached

792

# we probably have the whole chunk cached

788

df = None

793

df = None

789

else:

794

else:

790

df = self.opener(self.datafile)

795

df = self.opener(self.datafile)

791

796

792

# do we have useful data cached?

797

# do we have useful data cached?

793

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

798

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

794

base = self.cache[1]

799

base = self.cache[1]

795

text = self.cache[2]

800

text = self.cache[2]

796

self.loadindex(base, rev + 1)

801

self.loadindex(base, rev + 1)

797

else:

802

else:

798

self.loadindex(base, rev + 1)

803

self.loadindex(base, rev + 1)

799

text = self.chunk(base, df=df)

804

text = self.chunk(base, df=df)

800

805

801

bins = []

806

bins = []

802

for r in xrange(base + 1, rev + 1):

807

for r in xrange(base + 1, rev + 1):

803

bins.append(self.chunk(r, df=df))

808

bins.append(self.chunk(r, df=df))

804

809

805

text = self.patches(text, bins)

810

text = self.patches(text, bins)

806

811

807

p1, p2 = self.parents(node)

812

p1, p2 = self.parents(node)

808

if node != hash(text, p1, p2):

813

if node != hash(text, p1, p2):

809

raise RevlogError(_("integrity check failed on %s:%d")

814

raise RevlogError(_("integrity check failed on %s:%d")

810

% (self.datafile, rev))

815

% (self.datafile, rev))

811

816

812

self.cache = (node, rev, text)

817

self.cache = (node, rev, text)

813

return text

818

return text

814

819

815

def checkinlinesize(self, tr, fp=None):

820

def checkinlinesize(self, tr, fp=None):

816

if not self.inlinedata():

821

if not self.inlinedata():

817

return

822

return

818

if not fp:

823

if not fp:

819

fp = self.opener(self.indexfile, 'r')

824

fp = self.opener(self.indexfile, 'r')

820

size = fp.tell()

825

size = fp.tell()

821

if size < 131072:

826

if size < 131072:

822

return

827

return

823

tr.add(self.datafile, 0)

828

tr.add(self.datafile, 0)

824

df = self.opener(self.datafile, 'w')

829

df = self.opener(self.datafile, 'w')

825

calc = struct.calcsize(self.indexformat)

830

calc = struct.calcsize(self.indexformat)

826

for r in xrange(self.count()):

831

for r in xrange(self.count()):

827

start = self.start(r) + (r + 1) * calc

832

start = self.start(r) + (r + 1) * calc

828

length = self.length(r)

833

length = self.length(r)

829

fp.seek(start)

834

fp.seek(start)

830

d = fp.read(length)

835

d = fp.read(length)

831

df.write(d)

836

df.write(d)

832

fp.close()

837

fp.close()

833

df.close()

838

df.close()

834

fp = self.opener(self.indexfile, 'w', atomictemp=True)

839

fp = self.opener(self.indexfile, 'w', atomictemp=True)

835

self.version &= ~(REVLOGNGINLINEDATA)

840

self.version &= ~(REVLOGNGINLINEDATA)

836

if self.count():

841

if self.count():

837

x = self.index[0]

842

x = self.index[0]

838

e = struct.pack(self.indexformat, *x)[4:]

843

e = struct.pack(self.indexformat, *x)[4:]

839

l = struct.pack(versionformat, self.version)

844

l = struct.pack(versionformat, self.version)

840

fp.write(l)

845

fp.write(l)

841

fp.write(e)

846

fp.write(e)

842

847

843

for i in xrange(1, self.count()):

848

for i in xrange(1, self.count()):

844

x = self.index[i]

849

x = self.index[i]

845

e = struct.pack(self.indexformat, *x)

850

e = struct.pack(self.indexformat, *x)

846

fp.write(e)

851

fp.write(e)

847

852

848

# if we don't call rename, the temp file will never replace the

853

# if we don't call rename, the temp file will never replace the

849

# real index

854

# real index

850

fp.rename()

855

fp.rename()

851

self.chunkcache = None

856

self.chunkcache = None

852

857

853

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

858

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

854

"""add a revision to the log

859

"""add a revision to the log

855

860

856

text - the revision data to add

861

text - the revision data to add

857

transaction - the transaction object used for rollback

862

transaction - the transaction object used for rollback

858

link - the linkrev data to add

863

link - the linkrev data to add

859

p1, p2 - the parent nodeids of the revision

864

p1, p2 - the parent nodeids of the revision

860

d - an optional precomputed delta

865

d - an optional precomputed delta

861

"""

866

"""

862

if text is None: text = ""

867

if text is None: text = ""

863

if p1 is None: p1 = self.tip()

868

if p1 is None: p1 = self.tip()

864

if p2 is None: p2 = nullid

869

if p2 is None: p2 = nullid

865

870

866

node = hash(text, p1, p2)

871

node = hash(text, p1, p2)

867

872

868

if node in self.nodemap:

873

if node in self.nodemap:

869

return node

874

return node

870

875

871

n = self.count()

876

n = self.count()

872

t = n - 1

877

t = n - 1

873

878

874

if n:

879

if n:

875

base = self.base(t)

880

base = self.base(t)

876

start = self.start(base)

881

start = self.start(base)

877

end = self.end(t)

882

end = self.end(t)

878

if not d:

883

if not d:

879

prev = self.revision(self.tip())

884

prev = self.revision(self.tip())

880

d = self.diff(prev, str(text))

885

d = self.diff(prev, str(text))

881

data = compress(d)

886

data = compress(d)

882

l = len(data[1]) + len(data[0])

887

l = len(data[1]) + len(data[0])

883

dist = end - start + l

888

dist = end - start + l

884

889

885

# full versions are inserted when the needed deltas

890

# full versions are inserted when the needed deltas

886

# become comparable to the uncompressed text

891

# become comparable to the uncompressed text

887

if not n or dist > len(text) * 2:

892

if not n or dist > len(text) * 2:

888

data = compress(text)

893

data = compress(text)

889

l = len(data[1]) + len(data[0])

894

l = len(data[1]) + len(data[0])

890

base = n

895

base = n

891

else:

896

else:

892

base = self.base(t)

897

base = self.base(t)

893

898

894

offset = 0

899

offset = 0

895

if t >= 0:

900

if t >= 0:

896

offset = self.end(t)

901

offset = self.end(t)

897

902

898

if self.version == 0:

903

if self.version == 0:

899

e = (offset, l, base, link, p1, p2, node)

904

e = (offset, l, base, link, p1, p2, node)

900

else:

905

else:

901

e = (self.offset_type(offset, 0), l, len(text),

906

e = (self.offset_type(offset, 0), l, len(text),

902

base, link, self.rev(p1), self.rev(p2), node)

907

base, link, self.rev(p1), self.rev(p2), node)

903

908

904

self.index.append(e)

909

self.index.append(e)

905

self.nodemap[node] = n

910

self.nodemap[node] = n

906

entry = struct.pack(self.indexformat, *e)

911

entry = struct.pack(self.indexformat, *e)

907

912

908

if not self.inlinedata():

913

if not self.inlinedata():

909

transaction.add(self.datafile, offset)

914

transaction.add(self.datafile, offset)

910

transaction.add(self.indexfile, n * len(entry))

915

transaction.add(self.indexfile, n * len(entry))

911

f = self.opener(self.datafile, "a")

916

f = self.opener(self.datafile, "a")

912

if data[0]:

917

if data[0]:

913

f.write(data[0])

918

f.write(data[0])

914

f.write(data[1])

919

f.write(data[1])

915

f = self.opener(self.indexfile, "a")

920

f = self.opener(self.indexfile, "a")

916

else:

921

else:

917

f = self.opener(self.indexfile, "a+")

922

f = self.opener(self.indexfile, "a+")

918

f.seek(0, 2)

923

f.seek(0, 2)

919

transaction.add(self.indexfile, f.tell())

924

transaction.add(self.indexfile, f.tell())

920

925

921

if len(self.index) == 1 and self.version != 0:

926

if len(self.index) == 1 and self.version != 0:

922

l = struct.pack(versionformat, self.version)

927

l = struct.pack(versionformat, self.version)

923

f.write(l)

928

f.write(l)

924

entry = entry[4:]

929

entry = entry[4:]

925

930

926

f.write(entry)

931

f.write(entry)

927

932

928

if self.inlinedata():

933

if self.inlinedata():

929

f.write(data[0])

934

f.write(data[0])

930

f.write(data[1])

935

f.write(data[1])

931

self.checkinlinesize(transaction, f)

936

self.checkinlinesize(transaction, f)

932

937

933

self.cache = (node, n, text)

938

self.cache = (node, n, text)

934

return node

939

return node

935

940

936

def ancestor(self, a, b):

941

def ancestor(self, a, b):

937

"""calculate the least common ancestor of nodes a and b"""

942

"""calculate the least common ancestor of nodes a and b"""

938

# calculate the distance of every node from root

943

# calculate the distance of every node from root

939

dist = {nullid: 0}

944

dist = {nullid: 0}

940

for i in xrange(self.count()):

945

for i in xrange(self.count()):

941

n = self.node(i)

946

n = self.node(i)

942

p1, p2 = self.parents(n)

947

p1, p2 = self.parents(n)

943

dist[n] = max(dist[p1], dist[p2]) + 1

948

dist[n] = max(dist[p1], dist[p2]) + 1

944

949

945

# traverse ancestors in order of decreasing distance from root

950

# traverse ancestors in order of decreasing distance from root

946

def ancestors(node):

951

def ancestors(node):

947

# we store negative distances because heap returns smallest member

952

# we store negative distances because heap returns smallest member

948

h = [(-dist[node], node)]

953

h = [(-dist[node], node)]

949

seen = {}

954

seen = {}

950

while h:

955

while h:

951

d, n = heapq.heappop(h)

956

d, n = heapq.heappop(h)

952

if n not in seen:

957

if n not in seen:

953

seen[n] = 1

958

seen[n] = 1

954

yield (-d, n)

959

yield (-d, n)

955

for p in self.parents(n):

960

for p in self.parents(n):

956

heapq.heappush(h, (-dist[p], p))

961

heapq.heappush(h, (-dist[p], p))

957

962

958

def generations(node):

963

def generations(node):

959

sg, s = None, {}

964

sg, s = None, {}

960

for g,n in ancestors(node):

965

for g,n in ancestors(node):

961

if g != sg:

966

if g != sg:

962

if sg:

967

if sg:

963

yield sg, s

968

yield sg, s

964

sg, s = g, {n:1}

969

sg, s = g, {n:1}

965

else:

970

else:

966

s[n] = 1

971

s[n] = 1

967

yield sg, s

972

yield sg, s

968

973

969

x = generations(a)

974

x = generations(a)

970

y = generations(b)

975

y = generations(b)

971

gx = x.next()

976

gx = x.next()

972

gy = y.next()

977

gy = y.next()

973

978

974

# increment each ancestor list until it is closer to root than

979

# increment each ancestor list until it is closer to root than

975

# the other, or they match

980

# the other, or they match

976

while 1:

981

while 1:

977

#print "ancestor gen %s %s" % (gx[0], gy[0])

982

#print "ancestor gen %s %s" % (gx[0], gy[0])

978

if gx[0] == gy[0]:

983

if gx[0] == gy[0]:

979

# find the intersection

984

# find the intersection

980

i = [ n for n in gx[1] if n in gy[1] ]

985

i = [ n for n in gx[1] if n in gy[1] ]

981

if i:

986

if i:

982

return i[0]

987

return i[0]

983

else:

988

else:

984

#print "next"

989

#print "next"

985

gy = y.next()

990

gy = y.next()

986

gx = x.next()

991

gx = x.next()

987

elif gx[0] < gy[0]:

992

elif gx[0] < gy[0]:

988

#print "next y"

993

#print "next y"

989

gy = y.next()

994

gy = y.next()

990

else:

995

else:

991

#print "next x"

996

#print "next x"

992

gx = x.next()

997

gx = x.next()

993

998

994

def group(self, nodelist, lookup, infocollect=None):

999

def group(self, nodelist, lookup, infocollect=None):

995

"""calculate a delta group

1000

"""calculate a delta group

996

1001

997

Given a list of changeset revs, return a set of deltas and

1002

Given a list of changeset revs, return a set of deltas and

998

metadata corresponding to nodes. the first delta is

1003

metadata corresponding to nodes. the first delta is

999

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1004

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1000

have this parent as it has all history before these

1005

have this parent as it has all history before these

1001

changesets. parent is parent[0]

1006

changesets. parent is parent[0]

1002

"""

1007

"""

1003

revs = [self.rev(n) for n in nodelist]

1008

revs = [self.rev(n) for n in nodelist]

1004

1009

1005

# if we don't have any revisions touched by these changesets, bail

1010

# if we don't have any revisions touched by these changesets, bail

1006

if not revs:

1011

if not revs:

1007

yield changegroup.closechunk()

1012

yield changegroup.closechunk()

1008

return

1013

return

1009

1014

1010

# add the parent of the first rev

1015

# add the parent of the first rev

1011

p = self.parents(self.node(revs[0]))[0]

1016

p = self.parents(self.node(revs[0]))[0]

1012

revs.insert(0, self.rev(p))

1017

revs.insert(0, self.rev(p))

1013

1018

1014

# build deltas

1019

# build deltas

1015

for d in xrange(0, len(revs) - 1):

1020

for d in xrange(0, len(revs) - 1):

1016

a, b = revs[d], revs[d + 1]

1021

a, b = revs[d], revs[d + 1]

1017

nb = self.node(b)

1022

nb = self.node(b)

1018

1023

1019

if infocollect is not None:

1024

if infocollect is not None:

1020

infocollect(nb)

1025

infocollect(nb)

1021

1026

1022

d = self.revdiff(a, b)

1027

d = self.revdiff(a, b)

1023

p = self.parents(nb)

1028

p = self.parents(nb)

1024

meta = nb + p[0] + p[1] + lookup(nb)

1029

meta = nb + p[0] + p[1] + lookup(nb)

1025

yield changegroup.genchunk("%s%s" % (meta, d))

1030

yield changegroup.genchunk("%s%s" % (meta, d))

1026

1031

1027

yield changegroup.closechunk()

1032

yield changegroup.closechunk()

1028

1033

1029

def addgroup(self, revs, linkmapper, transaction, unique=0):

1034

def addgroup(self, revs, linkmapper, transaction, unique=0):

1030

"""

1035

"""

1031

add a delta group

1036

add a delta group

1032

1037

1033

given a set of deltas, add them to the revision log. the

1038

given a set of deltas, add them to the revision log. the

1034

first delta is against its parent, which should be in our

1039

first delta is against its parent, which should be in our

1035

log, the rest are against the previous delta.

1040

log, the rest are against the previous delta.

1036

"""

1041

"""

1037

1042

1038

#track the base of the current delta log

1043

#track the base of the current delta log

1039

r = self.count()

1044

r = self.count()

1040

t = r - 1

1045

t = r - 1

1041

node = None

1046

node = None

1042

1047

1043

base = prev = -1

1048

base = prev = -1

1044

start = end = textlen = 0

1049

start = end = textlen = 0

1045

if r:

1050

if r:

1046

end = self.end(t)

1051

end = self.end(t)

1047

1052

1048

ifh = self.opener(self.indexfile, "a+")

1053

ifh = self.opener(self.indexfile, "a+")

1049

ifh.seek(0, 2)

1054

ifh.seek(0, 2)

1050

transaction.add(self.indexfile, ifh.tell())

1055

transaction.add(self.indexfile, ifh.tell())

1051

if self.inlinedata():

1056

if self.inlinedata():

1052

dfh = None

1057

dfh = None

1053

else:

1058

else:

1054

transaction.add(self.datafile, end)

1059

transaction.add(self.datafile, end)

1055

dfh = self.opener(self.datafile, "a")

1060

dfh = self.opener(self.datafile, "a")

1056

1061

1057

# loop through our set of deltas

1062

# loop through our set of deltas

1058

chain = None

1063

chain = None

1059

for chunk in revs:

1064

for chunk in revs:

1060

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1065

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1061

link = linkmapper(cs)

1066

link = linkmapper(cs)

1062

if node in self.nodemap:

1067

if node in self.nodemap:

1063

# this can happen if two branches make the same change

1068

# this can happen if two branches make the same change

1064

# if unique:

1069

# if unique:

1065

# raise RevlogError(_("already have %s") % hex(node[:4]))

1070

# raise RevlogError(_("already have %s") % hex(node[:4]))

1066

chain = node

1071

chain = node

1067

continue

1072

continue

1068

delta = chunk[80:]

1073

delta = chunk[80:]

1069

1074

1070

for p in (p1, p2):

1075

for p in (p1, p2):

1071

if not p in self.nodemap:

1076

if not p in self.nodemap:

1072

raise RevlogError(_("unknown parent %s") % short(p1))

1077

raise RevlogError(_("unknown parent %s") % short(p1))

1073

1078

1074

if not chain:

1079

if not chain:

1075

# retrieve the parent revision of the delta chain

1080

# retrieve the parent revision of the delta chain

1076

chain = p1

1081

chain = p1

1077

if not chain in self.nodemap:

1082

if not chain in self.nodemap:

1078

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1083

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1079

1084

1080

# full versions are inserted when the needed deltas become

1085

# full versions are inserted when the needed deltas become

1081

# comparable to the uncompressed text or when the previous

1086

# comparable to the uncompressed text or when the previous

1082

# version is not the one we have a delta against. We use

1087

# version is not the one we have a delta against. We use

1083

# the size of the previous full rev as a proxy for the

1088

# the size of the previous full rev as a proxy for the

1084

# current size.

1089

# current size.

1085

1090

1086

if chain == prev:

1091

if chain == prev:

1087

tempd = compress(delta)

1092

tempd = compress(delta)

1088

cdelta = tempd[0] + tempd[1]

1093

cdelta = tempd[0] + tempd[1]

1089

textlen = mdiff.patchedsize(textlen, delta)

1094

textlen = mdiff.patchedsize(textlen, delta)

1090

1095

1091

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1096

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1092

# flush our writes here so we can read it in revision

1097

# flush our writes here so we can read it in revision

1093

if dfh:

1098

if dfh:

1094

dfh.flush()

1099

dfh.flush()

1095

ifh.flush()

1100

ifh.flush()

1096

text = self.revision(chain)

1101

text = self.revision(chain)

1097

text = self.patches(text, [delta])

1102

text = self.patches(text, [delta])

1098

chk = self.addrevision(text, transaction, link, p1, p2)

1103

chk = self.addrevision(text, transaction, link, p1, p2)

1099

if chk != node:

1104

if chk != node:

1100

raise RevlogError(_("consistency error adding group"))

1105

raise RevlogError(_("consistency error adding group"))

1101

textlen = len(text)

1106

textlen = len(text)

1102

else:

1107

else:

1103

if self.version == 0:

1108

if self.version == 0:

1104

e = (end, len(cdelta), base, link, p1, p2, node)

1109

e = (end, len(cdelta), base, link, p1, p2, node)

1105

else:

1110

else:

1106

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1111

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1107

link, self.rev(p1), self.rev(p2), node)

1112

link, self.rev(p1), self.rev(p2), node)

1108

self.index.append(e)

1113

self.index.append(e)

1109

self.nodemap[node] = r

1114

self.nodemap[node] = r

1110

if self.inlinedata():

1115

if self.inlinedata():

1111

ifh.write(struct.pack(self.indexformat, *e))

1116

ifh.write(struct.pack(self.indexformat, *e))

1112

ifh.write(cdelta)

1117

ifh.write(cdelta)

1113

self.checkinlinesize(transaction, ifh)

1118

self.checkinlinesize(transaction, ifh)

1114

if not self.inlinedata():

1119

if not self.inlinedata():

1115

dfh = self.opener(self.datafile, "a")

1120

dfh = self.opener(self.datafile, "a")

1116

ifh = self.opener(self.indexfile, "a")

1121

ifh = self.opener(self.indexfile, "a")

1117

else:

1122

else:

1118

if not dfh:

1123

if not dfh:

1119

# addrevision switched from inline to conventional

1124

# addrevision switched from inline to conventional

1120

# reopen the index

1125

# reopen the index

1121

dfh = self.opener(self.datafile, "a")

1126

dfh = self.opener(self.datafile, "a")

1122

ifh = self.opener(self.indexfile, "a")

1127

ifh = self.opener(self.indexfile, "a")

1123

dfh.write(cdelta)

1128

dfh.write(cdelta)

1124

ifh.write(struct.pack(self.indexformat, *e))

1129

ifh.write(struct.pack(self.indexformat, *e))

1125

1130

1126

t, r, chain, prev = r, r + 1, node, node

1131

t, r, chain, prev = r, r + 1, node, node

1127

base = self.base(t)

1132

base = self.base(t)

1128

start = self.start(base)

1133

start = self.start(base)

1129

end = self.end(t)

1134

end = self.end(t)

1130

1135

1131

if node is None:

1136

if node is None:

1132

raise RevlogError(_("group to be added is empty"))

1137

raise RevlogError(_("group to be added is empty"))

1133

return node

1138

return node

1134

1139

1135

def strip(self, rev, minlink):

1140

def strip(self, rev, minlink):

1136

if self.count() == 0 or rev >= self.count():

1141

if self.count() == 0 or rev >= self.count():

1137

return

1142

return

1138

1143

1139

if isinstance(self.index, lazyindex):

1144

if isinstance(self.index, lazyindex):

1140

self.loadindexmap()

1145

self.loadindexmap()

1141

1146

1142

# When stripping away a revision, we need to make sure it

1147

# When stripping away a revision, we need to make sure it

1143

# does not actually belong to an older changeset.

1148

# does not actually belong to an older changeset.

1144

# The minlink parameter defines the oldest revision

1149

# The minlink parameter defines the oldest revision

1145

# we're allowed to strip away.

1150

# we're allowed to strip away.

1146

while minlink > self.index[rev][-4]:

1151

while minlink > self.index[rev][-4]:

1147

rev += 1

1152

rev += 1

1148

if rev >= self.count():

1153

if rev >= self.count():

1149

return

1154

return

1150

1155

1151

# first truncate the files on disk

1156

# first truncate the files on disk

1152

end = self.start(rev)

1157

end = self.start(rev)

1153

if not self.inlinedata():

1158

if not self.inlinedata():

1154

df = self.opener(self.datafile, "a")

1159

df = self.opener(self.datafile, "a")

1155

df.truncate(end)

1160

df.truncate(end)

1156

end = rev * struct.calcsize(self.indexformat)

1161

end = rev * struct.calcsize(self.indexformat)

1157

else:

1162

else:

1158

end += rev * struct.calcsize(self.indexformat)

1163

end += rev * struct.calcsize(self.indexformat)

1159

1164

1160

indexf = self.opener(self.indexfile, "a")

1165

indexf = self.opener(self.indexfile, "a")

1161

indexf.truncate(end)

1166

indexf.truncate(end)

1162

1167

1163

# then reset internal state in memory to forget those revisions

1168

# then reset internal state in memory to forget those revisions

1164

self.cache = None

1169

self.cache = None

1165

self.chunkcache = None

1170

self.chunkcache = None

1166

for x in xrange(rev, self.count()):

1171

for x in xrange(rev, self.count()):

1167

del self.nodemap[self.node(x)]

1172

del self.nodemap[self.node(x)]

1168

1173

1169

del self.index[rev:]

1174

del self.index[rev:]

1170

1175

1171

def checksize(self):

1176

def checksize(self):

1172

expected = 0

1177

expected = 0

1173

if self.count():

1178

if self.count():

1174

expected = self.end(self.count() - 1)

1179

expected = self.end(self.count() - 1)

1175

1180

1176

try:

1181

try:

1177

f = self.opener(self.datafile)

1182

f = self.opener(self.datafile)

1178

f.seek(0, 2)

1183

f.seek(0, 2)

1179

actual = f.tell()

1184

actual = f.tell()

1180

dd = actual - expected

1185

dd = actual - expected

1181

except IOError, inst:

1186

except IOError, inst:

1182

if inst.errno != errno.ENOENT:

1187

if inst.errno != errno.ENOENT:

1183

raise

1188

raise

1184

dd = 0

1189

dd = 0

1185

1190

1186

try:

1191

try:

1187

f = self.opener(self.indexfile)

1192

f = self.opener(self.indexfile)

1188

f.seek(0, 2)

1193

f.seek(0, 2)

1189

actual = f.tell()

1194

actual = f.tell()

1190

s = struct.calcsize(self.indexformat)

1195

s = struct.calcsize(self.indexformat)

1191

i = actual / s

1196

i = actual / s

1192

di = actual - (i * s)

1197

di = actual - (i * s)

1193

if self.inlinedata():

1198

if self.inlinedata():

1194

databytes = 0

1199

databytes = 0

1195

for r in xrange(self.count()):

1200

for r in xrange(self.count()):

1196

databytes += self.length(r)

1201

databytes += self.length(r)

1197

dd = 0

1202

dd = 0

1198

di = actual - self.count() * s - databytes

1203

di = actual - self.count() * s - databytes

1199

except IOError, inst:

1204

except IOError, inst:

1200

if inst.errno != errno.ENOENT:

1205

if inst.errno != errno.ENOENT:

1201

raise

1206

raise

1202

di = 0

1207

di = 0

1203

1208

1204

return (dd, di)

1209

return (dd, di)

1205

1210

1206

1211

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii changegroup errno heapq mdiff os")
             demandload(globals(), "sha struct zlib")
             # revlog version strings
             REVLOGV0 = 0
             REVLOGNG = 1
             # revlog flags
             REVLOGNGINLINEDATA = (1 << 16)
             def flagstr(flag):
                 if flag == "inline":
                     return REVLOGNGINLINEDATA
                 raise RevlogError(_("unknown revlog flag %s" % flag))
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">i"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 def __init__(self, dataf, size, indexformat, shaoffset):
                     self.dataf = dataf
                     self.format = indexformat
                     self.s = struct.calcsize(indexformat)
                     self.indexformat = indexformat
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                     self.shaoffset = shaoffset
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap: return
                     start = 0
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + self.shaoffset:off + self.shaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all: return
                     if data is None:
                         self.dataf.seek(blockstart)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
-                            e = struct.unpack(self.format, b)
+                            self.index[i + x] = b
-                            self.index[i + x] = e
+                            n = b[self.shaoffset:self.shaoffset + 20]
-                            self.map[e[-1]] = i + x
+                            self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap: return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so weh have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.rfind(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + self.shaoffset:off + self.shaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all: return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (512 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~(32)) * self.s
                             blocksize = self.s * 64
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all: self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
-                    return self.p.index[pos] or self.load(pos)
+                    ret = self.p.index[pos] or self.load(pos)
+                    if isinstance(ret, str):
+                        ret = struct.unpack(self.p.indexformat, ret)
+                    return ret
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = item
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
-                        try:
+                        ret = self.p.index[i]
-                            yield self.p.index[i][-1]
+                        if not ret:
-                        except:
                             self.p.loadindex(i)
-                            yield self.p.index[i][-1]
+                            ret = self.p.index[i]
+                        if isinstance(ret, str):
+                            ret = struct.unpack(self.p.indexformat, ret)
+                        yield ret[-1]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile, defversion=0):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.indexstat = None
                     self.cache = None
                     self.chunkcache = None
                     self.defversion = defversion
                     self.load()
                 def load(self):
                     v = self.defversion
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     else:
                         try:
                             st = os.fstat(f.fileno())
                         except AttributeError, inst:
                             st = None
                         else:
                             oldst = self.indexstat
                             if (oldst and st.st_dev == oldst.st_dev
                                 and st.st_ino == oldst.st_ino
                                 and st.st_mtime == oldst.st_mtime
                                 and st.st_ctime == oldst.st_ctime):
                                 return
                             self.indexstat = st
                             if len(i) > 0:
                                 v = struct.unpack(versionformat, i)[0]
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == 0:
                         if flags:
                             raise RevlogError(_("index %s invalid flags %x for format v0" %
                                                (self.indexfile, flags)))
                     elif fmt == REVLOGNG:
                         if flags & ~REVLOGNGINLINEDATA:
                             raise RevlogError(_("index %s invalid flags %x for revlogng" %
                                                (self.indexfile, flags)))
                     else:
                         raise RevlogError(_("index %s invalid format %d" %
                                            (self.indexfile, fmt)))
                     self.version = v
                     if v == 0:
                         self.indexformat = indexformatv0
                         shaoffset = v0shaoffset
                     else:
                         self.indexformat = indexformatng
                         shaoffset = ngshaoffset
                     if i:
                         if not self.inlinedata() and st and st.st_size > 10000:
                             # big index, let's parse it on demand
                             parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
                             self.index = lazyindex(parser)
                             self.nodemap = lazymap(parser)
                         else:
                             i = f.read()
                             self.parseindex(i)
                         if self.inlinedata():
                             # we've already got the entire data file read in, save it
                             # in the chunk data
                             self.chunkcache = (0, i)
                         if self.version != 0:
                             e = list(self.index[0])
                             type = self.ngtype(e[0])
                             e[0] = self.offset_type(0, type)
                             self.index[0] = e
                     else:
                         self.nodemap = { nullid: -1}
                         self.index = []
                 def parseindex(self, data):
                     s = struct.calcsize(self.indexformat)
                     l = len(data)
                     self.index = []
                     self.nodemap =  {nullid: -1}
                     inline = self.inlinedata()
                     off = 0
                     n = 0
                     while off < l:
                         e = struct.unpack(self.indexformat, data[off:off + s])
                         self.index.append(e)
                         self.nodemap[e[-1]] = n
                         n += 1
                         off += s
                         if inline:
                             off += e[1]
                 def ngoffset(self, q):
                     if q & 0xFFFF:
                         raise RevlogError(_('%s: incompatible revision flag %x') %
                                            (self.indexfile, type))
                     return long(q >> 16)
                 def ngtype(self, q):
                     return int(q & 0xFFFF)
                 def offset_type(self, offset, type):
                     return long(long(offset) << 16 | type)
                 def loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev):
                     return (rev < 0) and nullid or self.index[rev][-1]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][-4]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     r = self.rev(node)
                     d = self.index[r][-3:-1]
                     if self.version == 0:
                         return d
                     return [ self.node(x) for x in d ]
                 def start(self, rev):
                     if rev < 0:
                         return -1
                     if self.version != 0:
                         return self.ngoffset(self.index[rev][0])
                     return self.index[rev][0]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = -1
                     if self.version != 0:
                         l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def length(self, rev):
                     if rev < 0:
                         return 0
                     else:
                         return self.index[rev][1]
                 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     reachable = {start: 1}
                     heads = {start: 1}
                     startrev = self.rev(start)
                     for r in xrange(startrev + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn in reachable:
                                 reachable[n] = 1
                                 heads[n] = 1
                             if pn in heads:
                                 del heads[pn]
                     return heads.keys()
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev, df=None, cachelen=4096):
                     start, length = self.start(rev), self.length(rev)
                     inline = self.inlinedata()
                     if inline:
                         start += (rev + 1) * struct.calcsize(self.indexformat)
                     end = start + length
                     def loadcache(df):
                         cache_length = max(cachelen, length) # 4k
                         if not df:
                             if inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache(df)
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache(df)
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     b1 = self.base(rev1)
                     b2 = self.base(rev2)
                     if b1 == b2 and rev1 + 1 == rev2:
                         return self.chunk(rev2)
                     else:
                         return self.diff(self.revision(self.node(rev1)),
                                          self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     if self.inlinedata():
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                         self.loadindex(base, rev + 1)
                     else:
                         self.loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r, df=df))
                     text = self.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self.inlinedata():
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                     size = fp.tell()
                     if size < 131072:
                         return
                     tr.add(self.datafile, 0)
                     df = self.opener(self.datafile, 'w')
                     calc = struct.calcsize(self.indexformat)
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     if self.count():
                         x = self.index[0]
                         e = struct.pack(self.indexformat, *x)[4:]
                         l = struct.pack(versionformat, self.version)
                         fp.write(l)
                         fp.write(e)
                     for i in xrange(1, self.count()):
                         x = self.index[i]
                         e = struct.pack(self.indexformat, *x)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     self.chunkcache = None
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     if self.version == 0:
                         e = (offset, l, base, link, p1, p2, node)
                     else:
                         e = (self.offset_type(offset, 0), l, len(text),
                              base, link, self.rev(p1), self.rev(p2), node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(self.indexformat, *e)
                     if not self.inlinedata():
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, n * len(entry))
                         f = self.opener(self.datafile, "a")
                         if data[0]:
                             f.write(data[0])
                         f.write(data[1])
                         f = self.opener(self.indexfile, "a")
                     else:
                         f = self.opener(self.indexfile, "a+")
                         f.seek(0, 2)
                         transaction.add(self.indexfile, f.tell())
                     if len(self.index) == 1 and self.version != 0:
                         l = struct.pack(versionformat, self.version)
                         f.write(l)
                         entry = entry[4:]
                     f.write(entry)
                     if self.inlinedata():
                         f.write(data[0])
                         f.write(data[1])
                         self.checkinlinesize(transaction, f)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         d = self.revdiff(a, b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         yield changegroup.genchunk("%s%s" % (meta, d))
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = -1
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     ifh.seek(0, 2)
                     transaction.add(self.indexfile, ifh.tell())
                     if self.inlinedata():
                         dfh = None
                     else:
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             if self.version == 0:
                                 e = (end, len(cdelta), base, link, p1, p2, node)
                             else:
                                 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             if self.inlinedata():
                                 ifh.write(struct.pack(self.indexformat, *e))
                                 ifh.write(cdelta)
                                 self.checkinlinesize(transaction, ifh)
                                 if not self.inlinedata():
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 if not dfh:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 dfh.write(cdelta)
                                 ifh.write(struct.pack(self.indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     if node is None:
                         raise RevlogError(_("group to be added is empty"))
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self.loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][-4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self.inlinedata():
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * struct.calcsize(self.indexformat)
                     else:
                         end += rev * struct.calcsize(self.indexformat)
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
                     self.chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(self.indexformat)
                         i = actual / s
                         di = actual - (i * s)
                         if self.inlinedata():
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += self.length(r)
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)