upstream/mercurial-mirror Commit - r2642:6414ee2e

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

17

demandload(globals(), "sha struct util zlib")

17

demandload(globals(), "sha struct util zlib")

18

19

# revlog version strings

19

# revlog version strings

20

REVLOGV0 = 0

20

REVLOGV0 = 0

21

REVLOGNG = 1

21

REVLOGNG = 1

22

23

# revlog flags

23

# revlog flags

24

REVLOGNGINLINEDATA = (1 << 16)

24

REVLOGNGINLINEDATA = (1 << 16)

25

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

25

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

26

27

REVLOG_DEFAULT_FORMAT = REVLOGNG

27

REVLOG_DEFAULT_FORMAT = REVLOGNG

28

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

28

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

29

30

def flagstr(flag):

30

def flagstr(flag):

31

if flag == "inline":

31

if flag == "inline":

32

return REVLOGNGINLINEDATA

32

return REVLOGNGINLINEDATA

33

raise RevlogError(_("unknown revlog flag %s" % flag))

33

raise RevlogError(_("unknown revlog flag %s" % flag))

34

35

def hash(text, p1, p2):

35

def hash(text, p1, p2):

36

"""generate a hash from the given text and its parent hashes

36

"""generate a hash from the given text and its parent hashes

37

38

This hash combines both the current file contents and its history

38

This hash combines both the current file contents and its history

39

in a manner that makes it easy to distinguish nodes with the same

39

in a manner that makes it easy to distinguish nodes with the same

40

content in the revision graph.

40

content in the revision graph.

41

"""

41

"""

42

l = [p1, p2]

42

l = [p1, p2]

43

l.sort()

43

l.sort()

44

s = sha.new(l[0])

44

s = sha.new(l[0])

45

s.update(l[1])

45

s.update(l[1])

46

s.update(text)

46

s.update(text)

47

return s.digest()

47

return s.digest()

48

49

def compress(text):

49

def compress(text):

50

""" generate a possibly-compressed representation of text """

50

""" generate a possibly-compressed representation of text """

51

if not text: return ("", text)

51

if not text: return ("", text)

52

if len(text) < 44:

52

if len(text) < 44:

53

if text[0] == '\0': return ("", text)

53

if text[0] == '\0': return ("", text)

54

return ('u', text)

54

return ('u', text)

55

bin = zlib.compress(text)

55

bin = zlib.compress(text)

56

if len(bin) > len(text):

56

if len(bin) > len(text):

57

if text[0] == '\0': return ("", text)

57

if text[0] == '\0': return ("", text)

58

return ('u', text)

58

return ('u', text)

59

return ("", bin)

59

return ("", bin)

60

61

def decompress(bin):

61

def decompress(bin):

62

""" decompress the given input """

62

""" decompress the given input """

63

if not bin: return bin

63

if not bin: return bin

64

t = bin[0]

64

t = bin[0]

65

if t == '\0': return bin

65

if t == '\0': return bin

66

if t == 'x': return zlib.decompress(bin)

66

if t == 'x': return zlib.decompress(bin)

67

if t == 'u': return bin[1:]

67

if t == 'u': return bin[1:]

68

raise RevlogError(_("unknown compression type %r") % t)

68

raise RevlogError(_("unknown compression type %r") % t)

69

70

indexformatv0 = ">4l20s20s20s"

70

indexformatv0 = ">4l20s20s20s"

71

v0shaoffset = 56

71

v0shaoffset = 56

72

# index ng:

72

# index ng:

73

# 6 bytes offset

73

# 6 bytes offset

74

# 2 bytes flags

74

# 2 bytes flags

75

# 4 bytes compressed length

75

# 4 bytes compressed length

76

# 4 bytes uncompressed length

76

# 4 bytes uncompressed length

77

# 4 bytes: base rev

77

# 4 bytes: base rev

78

# 4 bytes link rev

78

# 4 bytes link rev

79

# 4 bytes parent 1 rev

79

# 4 bytes parent 1 rev

80

# 4 bytes parent 2 rev

80

# 4 bytes parent 2 rev

81

# 32 bytes: nodeid

81

# 32 bytes: nodeid

82

indexformatng = ">Qiiiiii20s12x"

82

indexformatng = ">Qiiiiii20s12x"

83

ngshaoffset = 32

83

ngshaoffset = 32

84

versionformat = ">i"

84

versionformat = ">i"

85

86

class lazyparser(object):

86

class lazyparser(object):

87

"""

87

"""

88

this class avoids the need to parse the entirety of large indices

88

this class avoids the need to parse the entirety of large indices

89

"""

89

"""

90

91

# lazyparser is not safe to use on windows if win32 extensions not

91

# lazyparser is not safe to use on windows if win32 extensions not

92

# available. it keeps file handle open, which make it not possible

92

# available. it keeps file handle open, which make it not possible

93

# to break hardlinks on local cloned repos.

93

# to break hardlinks on local cloned repos.

94

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

94

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

95

hasattr(util, 'win32api'))

95

hasattr(util, 'win32api'))

96

97

def __init__(self, dataf, size, indexformat, shaoffset):

97

def __init__(self, dataf, size, indexformat, shaoffset):

98

self.dataf = dataf

98

self.dataf = dataf

99

self.format = indexformat

99

self.format = indexformat

100

self.s = struct.calcsize(indexformat)

100

self.s = struct.calcsize(indexformat)

101

self.indexformat = indexformat

101

self.indexformat = indexformat

102

self.datasize = size

102

self.datasize = size

103

self.l = size/self.s

103

self.l = size/self.s

104

self.index = [None] * self.l

104

self.index = [None] * self.l

105

self.map = {nullid: -1}

105

self.map = {nullid: -1}

106

self.allmap = 0

106

self.allmap = 0

107

self.all = 0

107

self.all = 0

108

self.mapfind_count = 0

108

self.mapfind_count = 0

109

self.shaoffset = shaoffset

109

self.shaoffset = shaoffset

110

111

def loadmap(self):

111

def loadmap(self):

112

"""

112

"""

113

during a commit, we need to make sure the rev being added is

113

during a commit, we need to make sure the rev being added is

114

not a duplicate. This requires loading the entire index,

114

not a duplicate. This requires loading the entire index,

115

which is fairly slow. loadmap can load up just the node map,

115

which is fairly slow. loadmap can load up just the node map,

116

which takes much less time.

116

which takes much less time.

117

"""

117

"""

118

if self.allmap: return

118

if self.allmap: return

119

start = 0

119

start = 0

120

end = self.datasize

120

end = self.datasize

121

self.allmap = 1

121

self.allmap = 1

122

cur = 0

122

cur = 0

123

count = 0

123

count = 0

124

blocksize = self.s * 256

124

blocksize = self.s * 256

125

self.dataf.seek(0)

125

self.dataf.seek(0)

126

while cur < end:

126

while cur < end:

127

data = self.dataf.read(blocksize)

127

data = self.dataf.read(blocksize)

128

off = 0

128

off = 0

129

for x in xrange(256):

129

for x in xrange(256):

130

n = data[off + self.shaoffset:off + self.shaoffset + 20]

130

n = data[off + self.shaoffset:off + self.shaoffset + 20]

131

self.map[n] = count

131

self.map[n] = count

132

count += 1

132

count += 1

133

if count >= self.l:

133

if count >= self.l:

134

break

134

break

135

off += self.s

135

off += self.s

136

cur += blocksize

136

cur += blocksize

137

138

def loadblock(self, blockstart, blocksize, data=None):

138

def loadblock(self, blockstart, blocksize, data=None):

139

if self.all: return

139

if self.all: return

140

if data is None:

140

if data is None:

141

self.dataf.seek(blockstart)

141

self.dataf.seek(blockstart)

142

data = self.dataf.read(blocksize)

142

data = self.dataf.read(blocksize)

143

lend = len(data) / self.s

143

lend = len(data) / self.s

144

i = blockstart / self.s

144

i = blockstart / self.s

145

off = 0

145

off = 0

146

for x in xrange(lend):

146

for x in xrange(lend):

147

if self.index[i + x] == None:

147

if self.index[i + x] == None:

148

b = data[off : off + self.s]

148

b = data[off : off + self.s]

149

self.index[i + x] = b

149

self.index[i + x] = b

150

n = b[self.shaoffset:self.shaoffset + 20]

150

n = b[self.shaoffset:self.shaoffset + 20]

151

self.map[n] = i + x

151

self.map[n] = i + x

152

off += self.s

152

off += self.s

153

154

def findnode(self, node):

154

def findnode(self, node):

155

"""search backwards through the index file for a specific node"""

155

"""search backwards through the index file for a specific node"""

156

if self.allmap: return None

156

if self.allmap: return None

157

158

# hg log will cause many many searches for the manifest

158

# hg log will cause many many searches for the manifest

159

# nodes. After we get called a few times, just load the whole

159

# nodes. After we get called a few times, just load the whole

160

# thing.

160

# thing.

161

if self.mapfind_count > 8:

161

if self.mapfind_count > 8:

162

self.loadmap()

162

self.loadmap()

163

if node in self.map:

163

if node in self.map:

164

return node

164

return node

165

return None

165

return None

166

self.mapfind_count += 1

166

self.mapfind_count += 1

167

last = self.l - 1

167

last = self.l - 1

168

while self.index[last] != None:

168

while self.index[last] != None:

169

if last == 0:

169

if last == 0:

170

self.all = 1

170

self.all = 1

171

self.allmap = 1

171

self.allmap = 1

172

return None

172

return None

173

last -= 1

173

last -= 1

174

end = (last + 1) * self.s

174

end = (last + 1) * self.s

175

blocksize = self.s * 256

175

blocksize = self.s * 256

176

while end >= 0:

176

while end >= 0:

177

start = max(end - blocksize, 0)

177

start = max(end - blocksize, 0)

178

self.dataf.seek(start)

178

self.dataf.seek(start)

179

data = self.dataf.read(end - start)

179

data = self.dataf.read(end - start)

180

findend = end - start

180

findend = end - start

181

while True:

181

while True:

182

# we're searching backwards, so weh have to make sure

182

# we're searching backwards, so weh have to make sure

183

# we don't find a changeset where this node is a parent

183

# we don't find a changeset where this node is a parent

184

off = data.rfind(node, 0, findend)

184

off = data.rfind(node, 0, findend)

185

findend = off

185

findend = off

186

if off >= 0:

186

if off >= 0:

187

i = off / self.s

187

i = off / self.s

188

off = i * self.s

188

off = i * self.s

189

n = data[off + self.shaoffset:off + self.shaoffset + 20]

189

n = data[off + self.shaoffset:off + self.shaoffset + 20]

190

if n == node:

190

if n == node:

191

self.map[n] = i + start / self.s

191

self.map[n] = i + start / self.s

192

return node

192

return node

193

else:

193

else:

194

break

194

break

195

end -= blocksize

195

end -= blocksize

196

return None

196

return None

197

198

def loadindex(self, i=None, end=None):

198

def loadindex(self, i=None, end=None):

199

if self.all: return

199

if self.all: return

200

all = False

200

all = False

201

if i == None:

201

if i == None:

202

blockstart = 0

202

blockstart = 0

203

blocksize = (512 / self.s) * self.s

203

blocksize = (512 / self.s) * self.s

204

end = self.datasize

204

end = self.datasize

205

all = True

205

all = True

206

else:

206

else:

207

if end:

207

if end:

208

blockstart = i * self.s

208

blockstart = i * self.s

209

end = end * self.s

209

end = end * self.s

210

blocksize = end - blockstart

210

blocksize = end - blockstart

211

else:

211

else:

212

blockstart = (i & ~(32)) * self.s

212

blockstart = (i & ~(32)) * self.s

213

blocksize = self.s * 64

213

blocksize = self.s * 64

214

end = blockstart + blocksize

214

end = blockstart + blocksize

215

while blockstart < end:

215

while blockstart < end:

216

self.loadblock(blockstart, blocksize)

216

self.loadblock(blockstart, blocksize)

217

blockstart += blocksize

217

blockstart += blocksize

218

if all: self.all = True

218

if all: self.all = True

219

220

class lazyindex(object):

220

class lazyindex(object):

221

"""a lazy version of the index array"""

221

"""a lazy version of the index array"""

222

def __init__(self, parser):

222

def __init__(self, parser):

223

self.p = parser

223

self.p = parser

224

def __len__(self):

224

def __len__(self):

225

return len(self.p.index)

225

return len(self.p.index)

226

def load(self, pos):

226

def load(self, pos):

227

if pos < 0:

227

if pos < 0:

228

pos += len(self.p.index)

228

pos += len(self.p.index)

229

self.p.loadindex(pos)

229

self.p.loadindex(pos)

230

return self.p.index[pos]

230

return self.p.index[pos]

231

def __getitem__(self, pos):

231

def __getitem__(self, pos):

232

ret = self.p.index[pos] or self.load(pos)

232

ret = self.p.index[pos] or self.load(pos)

233

if isinstance(ret, str):

233

if isinstance(ret, str):

234

ret = struct.unpack(self.p.indexformat, ret)

234

ret = struct.unpack(self.p.indexformat, ret)

235

return ret

235

return ret

236

def __setitem__(self, pos, item):

236

def __setitem__(self, pos, item):

237

self.p.index[pos] = item

237

self.p.index[pos] = item

238

def __delitem__(self, pos):

238

def __delitem__(self, pos):

239

del self.p.index[pos]

239

del self.p.index[pos]

240

def append(self, e):

240

def append(self, e):

241

self.p.index.append(e)

241

self.p.index.append(e)

242

243

class lazymap(object):

243

class lazymap(object):

244

"""a lazy version of the node map"""

244

"""a lazy version of the node map"""

245

def __init__(self, parser):

245

def __init__(self, parser):

246

self.p = parser

246

self.p = parser

247

def load(self, key):

247

def load(self, key):

248

n = self.p.findnode(key)

248

n = self.p.findnode(key)

249

if n == None:

249

if n == None:

250

raise KeyError(key)

250

raise KeyError(key)

251

def __contains__(self, key):

251

def __contains__(self, key):

252

if key in self.p.map:

252

if key in self.p.map:

253

return True

253

return True

254

self.p.loadmap()

254

self.p.loadmap()

255

return key in self.p.map

255

return key in self.p.map

256

def __iter__(self):

256

def __iter__(self):

257

yield nullid

257

yield nullid

258

for i in xrange(self.p.l):

258

for i in xrange(self.p.l):

259

ret = self.p.index[i]

259

ret = self.p.index[i]

260

if not ret:

260

if not ret:

261

self.p.loadindex(i)

261

self.p.loadindex(i)

262

ret = self.p.index[i]

262

ret = self.p.index[i]

263

if isinstance(ret, str):

263

if isinstance(ret, str):

264

ret = struct.unpack(self.p.indexformat, ret)

264

ret = struct.unpack(self.p.indexformat, ret)

265

yield ret[-1]

265

yield ret[-1]

266

def __getitem__(self, key):

266

def __getitem__(self, key):

267

try:

267

try:

268

return self.p.map[key]

268

return self.p.map[key]

269

except KeyError:

269

except KeyError:

270

try:

270

try:

271

self.load(key)

271

self.load(key)

272

return self.p.map[key]

272

return self.p.map[key]

273

except KeyError:

273

except KeyError:

274

raise KeyError("node " + hex(key))

274

raise KeyError("node " + hex(key))

275

def __setitem__(self, key, val):

275

def __setitem__(self, key, val):

276

self.p.map[key] = val

276

self.p.map[key] = val

277

def __delitem__(self, key):

277

def __delitem__(self, key):

278

del self.p.map[key]

278

del self.p.map[key]

279

280

class RevlogError(Exception): pass

280

class RevlogError(Exception): pass

281

282

class revlog(object):

282

class revlog(object):

283

"""

283

"""

284

the underlying revision storage object

284

the underlying revision storage object

285

286

A revlog consists of two parts, an index and the revision data.

286

A revlog consists of two parts, an index and the revision data.

287

288

The index is a file with a fixed record size containing

288

The index is a file with a fixed record size containing

289

information on each revision, includings its nodeid (hash), the

289

information on each revision, includings its nodeid (hash), the

290

nodeids of its parents, the position and offset of its data within

290

nodeids of its parents, the position and offset of its data within

291

the data file, and the revision it's based on. Finally, each entry

291

the data file, and the revision it's based on. Finally, each entry

292

contains a linkrev entry that can serve as a pointer to external

292

contains a linkrev entry that can serve as a pointer to external

293

data.

293

data.

294

295

The revision data itself is a linear collection of data chunks.

295

The revision data itself is a linear collection of data chunks.

296

Each chunk represents a revision and is usually represented as a

296

Each chunk represents a revision and is usually represented as a

297

delta against the previous chunk. To bound lookup time, runs of

297

delta against the previous chunk. To bound lookup time, runs of

298

deltas are limited to about 2 times the length of the original

298

deltas are limited to about 2 times the length of the original

299

version data. This makes retrieval of a version proportional to

299

version data. This makes retrieval of a version proportional to

300

its size, or O(1) relative to the number of revisions.

300

its size, or O(1) relative to the number of revisions.

301

302

Both pieces of the revlog are written to in an append-only

302

Both pieces of the revlog are written to in an append-only

303

fashion, which means we never need to rewrite a file to insert or

303

fashion, which means we never need to rewrite a file to insert or

304

remove data, and can use some simple techniques to avoid the need

304

remove data, and can use some simple techniques to avoid the need

305

for locking while reading.

305

for locking while reading.

306

"""

306

"""

307

def __init__(self, opener, indexfile, datafile,

307

def __init__(self, opener, indexfile, datafile,

308

defversion=REVLOG_DEFAULT_VERSION):

308

defversion=REVLOG_DEFAULT_VERSION):

309

"""

309

"""

310

create a revlog object

310

create a revlog object

311

312

opener is a function that abstracts the file opening operation

312

opener is a function that abstracts the file opening operation

313

and can be used to implement COW semantics or the like.

313

and can be used to implement COW semantics or the like.

314

"""

314

"""

315

self.indexfile = indexfile

315

self.indexfile = indexfile

316

self.datafile = datafile

316

self.datafile = datafile

317

self.opener = opener

317

self.opener = opener

318

319

self.indexstat = None

319

self.indexstat = None

320

self.cache = None

320

self.cache = None

321

self.chunkcache = None

321

self.chunkcache = None

322

self.defversion = defversion

322

self.defversion = defversion

323

self.load()

323

self.load()

324

325

def load(self):

325

def load(self):

326

v = self.defversion

326

v = self.defversion

327

try:

327

try:

328

f = self.opener(self.indexfile)

328

f = self.opener(self.indexfile)

329

i = f.read(4)

329

i = f.read(4)

330

f.seek(0)

330

f.seek(0)

331

except IOError, inst:

331

except IOError, inst:

332

if inst.errno != errno.ENOENT:

332

if inst.errno != errno.ENOENT:

333

raise

333

raise

334

i = ""

334

i = ""

335

else:

335

else:

336

try:

336

try:

337

st = util.fstat(f)

337

st = util.fstat(f)

338

except AttributeError, inst:

338

except AttributeError, inst:

339

st = None

339

st = None

340

else:

340

else:

341

oldst = self.indexstat

341

oldst = self.indexstat

342

if (oldst and st.st_dev == oldst.st_dev

342

if (oldst and st.st_dev == oldst.st_dev

343

and st.st_ino == oldst.st_ino

343

and st.st_ino == oldst.st_ino

344

and st.st_mtime == oldst.st_mtime

344

and st.st_mtime == oldst.st_mtime

345

and st.st_ctime == oldst.st_ctime):

345

and st.st_ctime == oldst.st_ctime):

346

return

346

return

347

self.indexstat = st

347

self.indexstat = st

348

if len(i) > 0:

348

if len(i) > 0:

349

v = struct.unpack(versionformat, i)[0]

349

v = struct.unpack(versionformat, i)[0]

350

flags = v & ~0xFFFF

350

flags = v & ~0xFFFF

351

fmt = v & 0xFFFF

351

fmt = v & 0xFFFF

352

if fmt == REVLOGV0:

352

if fmt == REVLOGV0:

353

if flags:

353

if flags:

354

raise RevlogError(_("index %s invalid flags %x for format v0" %

354

raise RevlogError(_("index %s invalid flags %x for format v0" %

355

(self.indexfile, flags)))

355

(self.indexfile, flags)))

356

elif fmt == REVLOGNG:

356

elif fmt == REVLOGNG:

357

if flags & ~REVLOGNGINLINEDATA:

357

if flags & ~REVLOGNGINLINEDATA:

358

raise RevlogError(_("index %s invalid flags %x for revlogng" %

358

raise RevlogError(_("index %s invalid flags %x for revlogng" %

359

(self.indexfile, flags)))

359

(self.indexfile, flags)))

360

else:

360

else:

361

raise RevlogError(_("index %s invalid format %d" %

361

raise RevlogError(_("index %s invalid format %d" %

362

(self.indexfile, fmt)))

362

(self.indexfile, fmt)))

363

self.version = v

363

self.version = v

364

if v == REVLOGV0:

364

if v == REVLOGV0:

365

self.indexformat = indexformatv0

365

self.indexformat = indexformatv0

366

shaoffset = v0shaoffset

366

shaoffset = v0shaoffset

367

else:

367

else:

368

self.indexformat = indexformatng

368

self.indexformat = indexformatng

369

shaoffset = ngshaoffset

369

shaoffset = ngshaoffset

370

371

if i:

371

if i:

372

if (lazyparser.safe_to_use and not self.inlinedata() and

372

if (lazyparser.safe_to_use and not self.inlinedata() and

373

st and st.st_size > 10000):

373

st and st.st_size > 10000):

374

# big index, let's parse it on demand

374

# big index, let's parse it on demand

375

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

375

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

376

self.index = lazyindex(parser)

376

self.index = lazyindex(parser)

377

self.nodemap = lazymap(parser)

377

self.nodemap = lazymap(parser)

378

else:

378

else:

379

self.parseindex(f, st)

379

self.parseindex(f, st)

380

if self.version != REVLOGV0:

380

if self.version != REVLOGV0:

381

e = list(self.index[0])

381

e = list(self.index[0])

382

type = self.ngtype(e[0])

382

type = self.ngtype(e[0])

383

e[0] = self.offset_type(0, type)

383

e[0] = self.offset_type(0, type)

384

self.index[0] = e

384

self.index[0] = e

385

else:

385

else:

386

self.nodemap = { nullid: -1}

386

self.nodemap = { nullid: -1}

387

self.index = []

387

self.index = []

388

389

390

def parseindex(self, fp, st):

390

def parseindex(self, fp, st):

391

s = struct.calcsize(self.indexformat)

391

s = struct.calcsize(self.indexformat)

392

self.index = []

392

self.index = []

393

self.nodemap = {nullid: -1}

393

self.nodemap = {nullid: -1}

394

inline = self.inlinedata()

394

inline = self.inlinedata()

395

n = 0

395

n = 0

396

leftover = None

396

leftover = None

397

while True:

397

while True:

398

if st:

398

if st:

399

data = fp.read(65536)

399

data = fp.read(65536)

400

else:

400

else:

401

# hack for httprangereader, it doesn't do partial reads well

401

# hack for httprangereader, it doesn't do partial reads well

402

data = fp.read()

402

data = fp.read()

403

if not data:

403

if not data:

404

break

404

break

405

if n == 0 and self.inlinedata():

405

if n == 0 and self.inlinedata():

406

# cache the first chunk

406

# cache the first chunk

407

self.chunkcache = (0, data)

407

self.chunkcache = (0, data)

408

if leftover:

408

if leftover:

409

data = leftover + data

409

data = leftover + data

410

leftover = None

410

leftover = None

411

off = 0

411

off = 0

412

l = len(data)

412

l = len(data)

413

while off < l:

413

while off < l:

414

if l - off < s:

414

if l - off < s:

415

leftover = data[off:]

415

leftover = data[off:]

416

break

416

break

417

cur = data[off:off + s]

417

cur = data[off:off + s]

418

off += s

418

off += s

419

e = struct.unpack(self.indexformat, cur)

419

e = struct.unpack(self.indexformat, cur)

420

self.index.append(e)

420

self.index.append(e)

421

self.nodemap[e[-1]] = n

421

self.nodemap[e[-1]] = n

422

n += 1

422

n += 1

423

if inline:

423

if inline:

424

off += e[1]

424

off += e[1]

425

if off > l:

425

if off > l:

426

# some things don't seek well, just read it

426

# some things don't seek well, just read it

427

fp.read(off - l)

427

fp.read(off - l)

428

if not st:

428

if not st:

429

break

429

break

430

431

432

def ngoffset(self, q):

432

def ngoffset(self, q):

433

if q & 0xFFFF:

433

if q & 0xFFFF:

434

raise RevlogError(_('%s: incompatible revision flag %x') %

434

raise RevlogError(_('%s: incompatible revision flag %x') %

435

(self.indexfile, q))

435

(self.indexfile, q))

436

return long(q >> 16)

436

return long(q >> 16)

437

438

def ngtype(self, q):

438

def ngtype(self, q):

439

return int(q & 0xFFFF)

439

return int(q & 0xFFFF)

440

441

def offset_type(self, offset, type):

441

def offset_type(self, offset, type):

442

return long(long(offset) << 16 | type)

442

return long(long(offset) << 16 | type)

443

444

def loadindex(self, start, end):

444

def loadindex(self, start, end):

445

"""load a block of indexes all at once from the lazy parser"""

445

"""load a block of indexes all at once from the lazy parser"""

446

if isinstance(self.index, lazyindex):

446

if isinstance(self.index, lazyindex):

447

self.index.p.loadindex(start, end)

447

self.index.p.loadindex(start, end)

448

449

def loadindexmap(self):

449

def loadindexmap(self):

450

"""loads both the map and the index from the lazy parser"""

450

"""loads both the map and the index from the lazy parser"""

451

if isinstance(self.index, lazyindex):

451

if isinstance(self.index, lazyindex):

452

p = self.index.p

452

p = self.index.p

453

p.loadindex()

453

p.loadindex()

454

self.nodemap = p.map

454

self.nodemap = p.map

455

456

def loadmap(self):

456

def loadmap(self):

457

"""loads the map from the lazy parser"""

457

"""loads the map from the lazy parser"""

458

if isinstance(self.nodemap, lazymap):

458

if isinstance(self.nodemap, lazymap):

459

self.nodemap.p.loadmap()

459

self.nodemap.p.loadmap()

460

self.nodemap = self.nodemap.p.map

460

self.nodemap = self.nodemap.p.map

461

462

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

462

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

463

def tip(self): return self.node(len(self.index) - 1)

463

def tip(self): return self.node(len(self.index) - 1)

464

def count(self): return len(self.index)

464

def count(self): return len(self.index)

465

def node(self, rev):

465

def node(self, rev):

466

return (rev < 0) and nullid or self.index[rev][-1]

466

return (rev < 0) and nullid or self.index[rev][-1]

467

def rev(self, node):

467

def rev(self, node):

468

try:

468

try:

469

return self.nodemap[node]

469

return self.nodemap[node]

470

except KeyError:

470

except KeyError:

471

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

471

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

472

def linkrev(self, node): return self.index[self.rev(node)][-4]

472

def linkrev(self, node):

473

return (node == nullid) and -1 or self.index[self.rev(node)][-4]

473

def parents(self, node):

474

def parents(self, node):

474

if node == nullid: return (nullid, nullid)

475

if node == nullid: return (nullid, nullid)

475

r = self.rev(node)

476

r = self.rev(node)

476

d = self.index[r][-3:-1]

477

d = self.index[r][-3:-1]

477

if self.version == REVLOGV0:

478

if self.version == REVLOGV0:

478

return d

479

return d

479

return [ self.node(x) for x in d ]

480

return [ self.node(x) for x in d ]

480

def parentrevs(self, rev):

481

def parentrevs(self, rev):

481

if rev == -1:

482

if rev == -1:

482

return (-1, -1)

483

return (-1, -1)

483

d = self.index[rev][-3:-1]

484

d = self.index[rev][-3:-1]

484

if self.version == REVLOGV0:

485

if self.version == REVLOGV0:

485

return [ self.rev(x) for x in d ]

486

return [ self.rev(x) for x in d ]

486

return d

487

return d

487

def start(self, rev):

488

def start(self, rev):

488

if rev < 0:

489

if rev < 0:

489

return -1

490

return -1

490

if self.version != REVLOGV0:

491

if self.version != REVLOGV0:

491

return self.ngoffset(self.index[rev][0])

492

return self.ngoffset(self.index[rev][0])

492

return self.index[rev][0]

493

return self.index[rev][0]

493

494

def end(self, rev): return self.start(rev) + self.length(rev)

495

def end(self, rev): return self.start(rev) + self.length(rev)

495

496

def size(self, rev):

497

def size(self, rev):

497

"""return the length of the uncompressed text for a given revision"""

498

"""return the length of the uncompressed text for a given revision"""

498

l = -1

499

l = -1

499

if self.version != REVLOGV0:

500

if self.version != REVLOGV0:

500

l = self.index[rev][2]

501

l = self.index[rev][2]

501

if l >= 0:

502

if l >= 0:

502

return l

503

return l

503

504

t = self.revision(self.node(rev))

505

t = self.revision(self.node(rev))

505

return len(t)

506

return len(t)

506

507

# alternate implementation, The advantage to this code is it

508

# alternate implementation, The advantage to this code is it

508

# will be faster for a single revision. But, the results are not

509

# will be faster for a single revision. But, the results are not

509

# cached, so finding the size of every revision will be slower.

510

# cached, so finding the size of every revision will be slower.

510

"""

511

"""

511

if self.cache and self.cache[1] == rev:

512

if self.cache and self.cache[1] == rev:

512

return len(self.cache[2])

513

return len(self.cache[2])

513

514

base = self.base(rev)

515

base = self.base(rev)

515

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

516

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

516

base = self.cache[1]

517

base = self.cache[1]

517

text = self.cache[2]

518

text = self.cache[2]

518

else:

519

else:

519

text = self.revision(self.node(base))

520

text = self.revision(self.node(base))

520

521

l = len(text)

522

l = len(text)

522

for x in xrange(base + 1, rev + 1):

523

for x in xrange(base + 1, rev + 1):

523

l = mdiff.patchedsize(l, self.chunk(x))

524

l = mdiff.patchedsize(l, self.chunk(x))

524

return l

525

return l

525

"""

526

"""

526

527

def length(self, rev):

528

def length(self, rev):

528

if rev < 0:

529

if rev < 0:

529

return 0

530

return 0

530

else:

531

else:

531

return self.index[rev][1]

532

return self.index[rev][1]

532

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

533

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

533

534

def reachable(self, rev, stop=None):

535

def reachable(self, rev, stop=None):

535

reachable = {}

536

reachable = {}

536

visit = [rev]

537

visit = [rev]

537

reachable[rev] = 1

538

reachable[rev] = 1

538

if stop:

539

if stop:

539

stopn = self.rev(stop)

540

stopn = self.rev(stop)

540

else:

541

else:

541

stopn = 0

542

stopn = 0

542

while visit:

543

while visit:

543

n = visit.pop(0)

544

n = visit.pop(0)

544

if n == stop:

545

if n == stop:

545

continue

546

continue

546

if n == nullid:

547

if n == nullid:

547

continue

548

continue

548

for p in self.parents(n):

549

for p in self.parents(n):

549

if self.rev(p) < stopn:

550

if self.rev(p) < stopn:

550

continue

551

continue

551

if p not in reachable:

552

if p not in reachable:

552

reachable[p] = 1

553

reachable[p] = 1

553

visit.append(p)

554

visit.append(p)

554

return reachable

555

return reachable

555

556

def nodesbetween(self, roots=None, heads=None):

557

def nodesbetween(self, roots=None, heads=None):

557

"""Return a tuple containing three elements. Elements 1 and 2 contain

558

"""Return a tuple containing three elements. Elements 1 and 2 contain

558

a final list bases and heads after all the unreachable ones have been

559

a final list bases and heads after all the unreachable ones have been

559

pruned. Element 0 contains a topologically sorted list of all

560

pruned. Element 0 contains a topologically sorted list of all

560

561

nodes that satisfy these constraints:

562

nodes that satisfy these constraints:

562

1. All nodes must be descended from a node in roots (the nodes on

563

1. All nodes must be descended from a node in roots (the nodes on

563

roots are considered descended from themselves).

564

roots are considered descended from themselves).

564

2. All nodes must also be ancestors of a node in heads (the nodes in

565

2. All nodes must also be ancestors of a node in heads (the nodes in

565

heads are considered to be their own ancestors).

566

heads are considered to be their own ancestors).

566

567

If roots is unspecified, nullid is assumed as the only root.

568

If roots is unspecified, nullid is assumed as the only root.

568

If heads is unspecified, it is taken to be the output of the

569

If heads is unspecified, it is taken to be the output of the

569

heads method (i.e. a list of all nodes in the repository that

570

heads method (i.e. a list of all nodes in the repository that

570

have no children)."""

571

have no children)."""

571

nonodes = ([], [], [])

572

nonodes = ([], [], [])

572

if roots is not None:

573

if roots is not None:

573

roots = list(roots)

574

roots = list(roots)

574

if not roots:

575

if not roots:

575

return nonodes

576

return nonodes

576

lowestrev = min([self.rev(n) for n in roots])

577

lowestrev = min([self.rev(n) for n in roots])

577

else:

578

else:

578

roots = [nullid] # Everybody's a descendent of nullid

579

roots = [nullid] # Everybody's a descendent of nullid

579

lowestrev = -1

580

lowestrev = -1

580

if (lowestrev == -1) and (heads is None):

581

if (lowestrev == -1) and (heads is None):

581

# We want _all_ the nodes!

582

# We want _all_ the nodes!

582

return ([self.node(r) for r in xrange(0, self.count())],

583

return ([self.node(r) for r in xrange(0, self.count())],

583

[nullid], list(self.heads()))

584

[nullid], list(self.heads()))

584

if heads is None:

585

if heads is None:

585

# All nodes are ancestors, so the latest ancestor is the last

586

# All nodes are ancestors, so the latest ancestor is the last

586

# node.

587

# node.

587

highestrev = self.count() - 1

588

highestrev = self.count() - 1

588

# Set ancestors to None to signal that every node is an ancestor.

589

# Set ancestors to None to signal that every node is an ancestor.

589

ancestors = None

590

ancestors = None

590

# Set heads to an empty dictionary for later discovery of heads

591

# Set heads to an empty dictionary for later discovery of heads

591

heads = {}

592

heads = {}

592

else:

593

else:

593

heads = list(heads)

594

heads = list(heads)

594

if not heads:

595

if not heads:

595

return nonodes

596

return nonodes

596

ancestors = {}

597

ancestors = {}

597

# Start at the top and keep marking parents until we're done.

598

# Start at the top and keep marking parents until we're done.

598

nodestotag = heads[:]

599

nodestotag = heads[:]

599

# Turn heads into a dictionary so we can remove 'fake' heads.

600

# Turn heads into a dictionary so we can remove 'fake' heads.

600

# Also, later we will be using it to filter out the heads we can't

601

# Also, later we will be using it to filter out the heads we can't

601

# find from roots.

602

# find from roots.

602

heads = dict.fromkeys(heads, 0)

603

heads = dict.fromkeys(heads, 0)

603

# Remember where the top was so we can use it as a limit later.

604

# Remember where the top was so we can use it as a limit later.

604

highestrev = max([self.rev(n) for n in nodestotag])

605

highestrev = max([self.rev(n) for n in nodestotag])

605

while nodestotag:

606

while nodestotag:

606

# grab a node to tag

607

# grab a node to tag

607

n = nodestotag.pop()

608

n = nodestotag.pop()

608

# Never tag nullid

609

# Never tag nullid

609

if n == nullid:

610

if n == nullid:

610

continue

611

continue

611

# A node's revision number represents its place in a

612

# A node's revision number represents its place in a

612

# topologically sorted list of nodes.

613

# topologically sorted list of nodes.

613

r = self.rev(n)

614

r = self.rev(n)

614

if r >= lowestrev:

615

if r >= lowestrev:

615

if n not in ancestors:

616

if n not in ancestors:

616

# If we are possibly a descendent of one of the roots

617

# If we are possibly a descendent of one of the roots

617

# and we haven't already been marked as an ancestor

618

# and we haven't already been marked as an ancestor

618

ancestors[n] = 1 # Mark as ancestor

619

ancestors[n] = 1 # Mark as ancestor

619

# Add non-nullid parents to list of nodes to tag.

620

# Add non-nullid parents to list of nodes to tag.

620

nodestotag.extend([p for p in self.parents(n) if

621

nodestotag.extend([p for p in self.parents(n) if

621

p != nullid])

622

p != nullid])

622

elif n in heads: # We've seen it before, is it a fake head?

623

elif n in heads: # We've seen it before, is it a fake head?

623

# So it is, real heads should not be the ancestors of

624

# So it is, real heads should not be the ancestors of

624

# any other heads.

625

# any other heads.

625

heads.pop(n)

626

heads.pop(n)

626

if not ancestors:

627

if not ancestors:

627

return nonodes

628

return nonodes

628

# Now that we have our set of ancestors, we want to remove any

629

# Now that we have our set of ancestors, we want to remove any

629

# roots that are not ancestors.

630

# roots that are not ancestors.

630

631

# If one of the roots was nullid, everything is included anyway.

632

# If one of the roots was nullid, everything is included anyway.

632

if lowestrev > -1:

633

if lowestrev > -1:

633

# But, since we weren't, let's recompute the lowest rev to not

634

# But, since we weren't, let's recompute the lowest rev to not

634

# include roots that aren't ancestors.

635

# include roots that aren't ancestors.

635

636

# Filter out roots that aren't ancestors of heads

637

# Filter out roots that aren't ancestors of heads

637

roots = [n for n in roots if n in ancestors]

638

roots = [n for n in roots if n in ancestors]

638

# Recompute the lowest revision

639

# Recompute the lowest revision

639

if roots:

640

if roots:

640

lowestrev = min([self.rev(n) for n in roots])

641

lowestrev = min([self.rev(n) for n in roots])

641

else:

642

else:

642

# No more roots? Return empty list

643

# No more roots? Return empty list

643

return nonodes

644

return nonodes

644

else:

645

else:

645

# We are descending from nullid, and don't need to care about

646

# We are descending from nullid, and don't need to care about

646

# any other roots.

647

# any other roots.

647

lowestrev = -1

648

lowestrev = -1

648

roots = [nullid]

649

roots = [nullid]

649

# Transform our roots list into a 'set' (i.e. a dictionary where the

650

# Transform our roots list into a 'set' (i.e. a dictionary where the

650

# values don't matter.

651

# values don't matter.

651

descendents = dict.fromkeys(roots, 1)

652

descendents = dict.fromkeys(roots, 1)

652

# Also, keep the original roots so we can filter out roots that aren't

653

# Also, keep the original roots so we can filter out roots that aren't

653

# 'real' roots (i.e. are descended from other roots).

654

# 'real' roots (i.e. are descended from other roots).

654

roots = descendents.copy()

655

roots = descendents.copy()

655

# Our topologically sorted list of output nodes.

656

# Our topologically sorted list of output nodes.

656

orderedout = []

657

orderedout = []

657

# Don't start at nullid since we don't want nullid in our output list,

658

# Don't start at nullid since we don't want nullid in our output list,

658

# and if nullid shows up in descedents, empty parents will look like

659

# and if nullid shows up in descedents, empty parents will look like

659

# they're descendents.

660

# they're descendents.

660

for r in xrange(max(lowestrev, 0), highestrev + 1):

661

for r in xrange(max(lowestrev, 0), highestrev + 1):

661

n = self.node(r)

662

n = self.node(r)

662

isdescendent = False

663

isdescendent = False

663

if lowestrev == -1: # Everybody is a descendent of nullid

664

if lowestrev == -1: # Everybody is a descendent of nullid

664

isdescendent = True

665

isdescendent = True

665

elif n in descendents:

666

elif n in descendents:

666

# n is already a descendent

667

# n is already a descendent

667

isdescendent = True

668

isdescendent = True

668

# This check only needs to be done here because all the roots

669

# This check only needs to be done here because all the roots

669

# will start being marked is descendents before the loop.

670

# will start being marked is descendents before the loop.

670

if n in roots:

671

if n in roots:

671

# If n was a root, check if it's a 'real' root.

672

# If n was a root, check if it's a 'real' root.

672

p = tuple(self.parents(n))

673

p = tuple(self.parents(n))

673

# If any of its parents are descendents, it's not a root.

674

# If any of its parents are descendents, it's not a root.

674

if (p[0] in descendents) or (p[1] in descendents):

675

if (p[0] in descendents) or (p[1] in descendents):

675

roots.pop(n)

676

roots.pop(n)

676

else:

677

else:

677

p = tuple(self.parents(n))

678

p = tuple(self.parents(n))

678

# A node is a descendent if either of its parents are

679

# A node is a descendent if either of its parents are

679

# descendents. (We seeded the dependents list with the roots

680

# descendents. (We seeded the dependents list with the roots

680

# up there, remember?)

681

# up there, remember?)

681

if (p[0] in descendents) or (p[1] in descendents):

682

if (p[0] in descendents) or (p[1] in descendents):

682

descendents[n] = 1

683

descendents[n] = 1

683

isdescendent = True

684

isdescendent = True

684

if isdescendent and ((ancestors is None) or (n in ancestors)):

685

if isdescendent and ((ancestors is None) or (n in ancestors)):

685

# Only include nodes that are both descendents and ancestors.

686

# Only include nodes that are both descendents and ancestors.

686

orderedout.append(n)

687

orderedout.append(n)

687

if (ancestors is not None) and (n in heads):

688

if (ancestors is not None) and (n in heads):

688

# We're trying to figure out which heads are reachable

689

# We're trying to figure out which heads are reachable

689

# from roots.

690

# from roots.

690

# Mark this head as having been reached

691

# Mark this head as having been reached

691

heads[n] = 1

692

heads[n] = 1

692

elif ancestors is None:

693

elif ancestors is None:

693

# Otherwise, we're trying to discover the heads.

694

# Otherwise, we're trying to discover the heads.

694

# Assume this is a head because if it isn't, the next step

695

# Assume this is a head because if it isn't, the next step

695

# will eventually remove it.

696

# will eventually remove it.

696

heads[n] = 1

697

heads[n] = 1

697

# But, obviously its parents aren't.

698

# But, obviously its parents aren't.

698

for p in self.parents(n):

699

for p in self.parents(n):

699

heads.pop(p, None)

700

heads.pop(p, None)

700

heads = [n for n in heads.iterkeys() if heads[n] != 0]

701

heads = [n for n in heads.iterkeys() if heads[n] != 0]

701

roots = roots.keys()

702

roots = roots.keys()

702

assert orderedout

703

assert orderedout

703

assert roots

704

assert roots

704

assert heads

705

assert heads

705

return (orderedout, roots, heads)

706

return (orderedout, roots, heads)

706

707

def heads(self, start=None):

708

def heads(self, start=None):

708

"""return the list of all nodes that have no children

709

"""return the list of all nodes that have no children

709

710

if start is specified, only heads that are descendants of

711

if start is specified, only heads that are descendants of

711

start will be returned

712

start will be returned

712

713

"""

714

"""

714

if start is None:

715

if start is None:

715

start = nullid

716

start = nullid

716

startrev = self.rev(start)

717

startrev = self.rev(start)

717

reachable = {startrev: 1}

718

reachable = {startrev: 1}

718

heads = {startrev: 1}

719

heads = {startrev: 1}

719

720

parentrevs = self.parentrevs

721

parentrevs = self.parentrevs

721

for r in xrange(startrev + 1, self.count()):

722

for r in xrange(startrev + 1, self.count()):

722

for p in parentrevs(r):

723

for p in parentrevs(r):

723

if p in reachable:

724

if p in reachable:

724

reachable[r] = 1

725

reachable[r] = 1

725

heads[r] = 1

726

heads[r] = 1

726

if p in heads:

727

if p in heads:

727

del heads[p]

728

del heads[p]

728

return [self.node(r) for r in heads]

729

return [self.node(r) for r in heads]

729

730

def children(self, node):

731

def children(self, node):

731

"""find the children of a given node"""

732

"""find the children of a given node"""

732

c = []

733

c = []

733

p = self.rev(node)

734

p = self.rev(node)

734

for r in range(p + 1, self.count()):

735

for r in range(p + 1, self.count()):

735

n = self.node(r)

736

n = self.node(r)

736

for pn in self.parents(n):

737

for pn in self.parents(n):

737

if pn == node:

738

if pn == node:

738

c.append(n)

739

c.append(n)

739

continue

740

continue

740

elif pn == nullid:

741

elif pn == nullid:

741

continue

742

continue

742

return c

743

return c

743

744

def lookup(self, id):

745

def lookup(self, id):

745

"""locate a node based on revision number or subset of hex nodeid"""

746

"""locate a node based on revision number or subset of hex nodeid"""

746

if id in self.nodemap:

747

if id in self.nodemap:

747

return id

748

return id

748

if type(id) == type(0):

749

if type(id) == type(0):

749

return self.node(id)

750

return self.node(id)

750

try:

751

try:

751

rev = int(id)

752

rev = int(id)

752

if str(rev) != id: raise ValueError

753

if str(rev) != id: raise ValueError

753

if rev < 0: rev = self.count() + rev

754

if rev < 0: rev = self.count() + rev

754

if rev < 0 or rev >= self.count(): raise ValueError

755

if rev < 0 or rev >= self.count(): raise ValueError

755

return self.node(rev)

756

return self.node(rev)

756

except (ValueError, OverflowError):

757

except (ValueError, OverflowError):

757

c = []

758

c = []

758

for n in self.nodemap:

759

for n in self.nodemap:

759

if hex(n).startswith(id):

760

if hex(n).startswith(id):

760

c.append(n)

761

c.append(n)

761

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

762

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

762

if len(c) < 1: raise RevlogError(_("No match found"))

763

if len(c) < 1: raise RevlogError(_("No match found"))

763

return c[0]

764

return c[0]

764

765

return None

766

return None

766

767

def diff(self, a, b):

768

def diff(self, a, b):

768

"""return a delta between two revisions"""

769

"""return a delta between two revisions"""

769

return mdiff.textdiff(a, b)

770

return mdiff.textdiff(a, b)

770

771

def patches(self, t, pl):

772

def patches(self, t, pl):

772

"""apply a list of patches to a string"""

773

"""apply a list of patches to a string"""

773

return mdiff.patches(t, pl)

774

return mdiff.patches(t, pl)

774

775

def chunk(self, rev, df=None, cachelen=4096):

776

def chunk(self, rev, df=None, cachelen=4096):

776

start, length = self.start(rev), self.length(rev)

777

start, length = self.start(rev), self.length(rev)

777

inline = self.inlinedata()

778

inline = self.inlinedata()

778

if inline:

779

if inline:

779

start += (rev + 1) * struct.calcsize(self.indexformat)

780

start += (rev + 1) * struct.calcsize(self.indexformat)

780

end = start + length

781

end = start + length

781

def loadcache(df):

782

def loadcache(df):

782

cache_length = max(cachelen, length) # 4k

783

cache_length = max(cachelen, length) # 4k

783

if not df:

784

if not df:

784

if inline:

785

if inline:

785

df = self.opener(self.indexfile)

786

df = self.opener(self.indexfile)

786

else:

787

else:

787

df = self.opener(self.datafile)

788

df = self.opener(self.datafile)

788

df.seek(start)

789

df.seek(start)

789

self.chunkcache = (start, df.read(cache_length))

790

self.chunkcache = (start, df.read(cache_length))

790

791

if not self.chunkcache:

792

if not self.chunkcache:

792

loadcache(df)

793

loadcache(df)

793

794

cache_start = self.chunkcache[0]

795

cache_start = self.chunkcache[0]

795

cache_end = cache_start + len(self.chunkcache[1])

796

cache_end = cache_start + len(self.chunkcache[1])

796

if start >= cache_start and end <= cache_end:

797

if start >= cache_start and end <= cache_end:

797

# it is cached

798

# it is cached

798

offset = start - cache_start

799

offset = start - cache_start

799

else:

800

else:

800

loadcache(df)

801

loadcache(df)

801

offset = 0

802

offset = 0

802

803

#def checkchunk():

804

#def checkchunk():

804

# df = self.opener(self.datafile)

805

# df = self.opener(self.datafile)

805

# df.seek(start)

806

# df.seek(start)

806

# return df.read(length)

807

# return df.read(length)

807

#assert s == checkchunk()

808

#assert s == checkchunk()

808

return decompress(self.chunkcache[1][offset:offset + length])

809

return decompress(self.chunkcache[1][offset:offset + length])

809

810

def delta(self, node):

811

def delta(self, node):

811

"""return or calculate a delta between a node and its predecessor"""

812

"""return or calculate a delta between a node and its predecessor"""

812

r = self.rev(node)

813

r = self.rev(node)

813

return self.revdiff(r - 1, r)

814

return self.revdiff(r - 1, r)

814

815

def revdiff(self, rev1, rev2):

816

def revdiff(self, rev1, rev2):

816

"""return or calculate a delta between two revisions"""

817

"""return or calculate a delta between two revisions"""

817

b1 = self.base(rev1)

818

b1 = self.base(rev1)

818

b2 = self.base(rev2)

819

b2 = self.base(rev2)

819

if b1 == b2 and rev1 + 1 == rev2:

820

if b1 == b2 and rev1 + 1 == rev2:

820

return self.chunk(rev2)

821

return self.chunk(rev2)

821

else:

822

else:

822

return self.diff(self.revision(self.node(rev1)),

823

return self.diff(self.revision(self.node(rev1)),

823

self.revision(self.node(rev2)))

824

self.revision(self.node(rev2)))

824

825

def revision(self, node):

826

def revision(self, node):

826

"""return an uncompressed revision of a given"""

827

"""return an uncompressed revision of a given"""

827

if node == nullid: return ""

828

if node == nullid: return ""

828

if self.cache and self.cache[0] == node: return self.cache[2]

829

if self.cache and self.cache[0] == node: return self.cache[2]

829

830

# look up what we need to read

831

# look up what we need to read

831

text = None

832

text = None

832

rev = self.rev(node)

833

rev = self.rev(node)

833

base = self.base(rev)

834

base = self.base(rev)

834

835

if self.inlinedata():

836

if self.inlinedata():

836

# we probably have the whole chunk cached

837

# we probably have the whole chunk cached

837

df = None

838

df = None

838

else:

839

else:

839

df = self.opener(self.datafile)

840

df = self.opener(self.datafile)

840

841

# do we have useful data cached?

842

# do we have useful data cached?

842

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

843

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

843

base = self.cache[1]

844

base = self.cache[1]

844

text = self.cache[2]

845

text = self.cache[2]

845

self.loadindex(base, rev + 1)

846

self.loadindex(base, rev + 1)

846

else:

847

else:

847

self.loadindex(base, rev + 1)

848

self.loadindex(base, rev + 1)

848

text = self.chunk(base, df=df)

849

text = self.chunk(base, df=df)

849

850

bins = []

851

bins = []

851

for r in xrange(base + 1, rev + 1):

852

for r in xrange(base + 1, rev + 1):

852

bins.append(self.chunk(r, df=df))

853

bins.append(self.chunk(r, df=df))

853

854

text = self.patches(text, bins)

855

text = self.patches(text, bins)

855

856

p1, p2 = self.parents(node)

857

p1, p2 = self.parents(node)

857

if node != hash(text, p1, p2):

858

if node != hash(text, p1, p2):

858

raise RevlogError(_("integrity check failed on %s:%d")

859

raise RevlogError(_("integrity check failed on %s:%d")

859

% (self.datafile, rev))

860

% (self.datafile, rev))

860

861

self.cache = (node, rev, text)

862

self.cache = (node, rev, text)

862

return text

863

return text

863

864

def checkinlinesize(self, tr, fp=None):

865

def checkinlinesize(self, tr, fp=None):

865

if not self.inlinedata():

866

if not self.inlinedata():

866

return

867

return

867

if not fp:

868

if not fp:

868

fp = self.opener(self.indexfile, 'r')

869

fp = self.opener(self.indexfile, 'r')

869

fp.seek(0, 2)

870

fp.seek(0, 2)

870

size = fp.tell()

871

size = fp.tell()

871

if size < 131072:

872

if size < 131072:

872

return

873

return

873

trinfo = tr.find(self.indexfile)

874

trinfo = tr.find(self.indexfile)

874

if trinfo == None:

875

if trinfo == None:

875

raise RevlogError(_("%s not found in the transaction" %

876

raise RevlogError(_("%s not found in the transaction" %

876

self.indexfile))

877

self.indexfile))

877

878

trindex = trinfo[2]

879

trindex = trinfo[2]

879

dataoff = self.start(trindex)

880

dataoff = self.start(trindex)

880

881

tr.add(self.datafile, dataoff)

882

tr.add(self.datafile, dataoff)

882

df = self.opener(self.datafile, 'w')

883

df = self.opener(self.datafile, 'w')

883

calc = struct.calcsize(self.indexformat)

884

calc = struct.calcsize(self.indexformat)

884

for r in xrange(self.count()):

885

for r in xrange(self.count()):

885

start = self.start(r) + (r + 1) * calc

886

start = self.start(r) + (r + 1) * calc

886

length = self.length(r)

887

length = self.length(r)

887

fp.seek(start)

888

fp.seek(start)

888

d = fp.read(length)

889

d = fp.read(length)

889

df.write(d)

890

df.write(d)

890

fp.close()

891

fp.close()

891

df.close()

892

df.close()

892

fp = self.opener(self.indexfile, 'w', atomictemp=True)

893

fp = self.opener(self.indexfile, 'w', atomictemp=True)

893

self.version &= ~(REVLOGNGINLINEDATA)

894

self.version &= ~(REVLOGNGINLINEDATA)

894

if self.count():

895

if self.count():

895

x = self.index[0]

896

x = self.index[0]

896

e = struct.pack(self.indexformat, *x)[4:]

897

e = struct.pack(self.indexformat, *x)[4:]

897

l = struct.pack(versionformat, self.version)

898

l = struct.pack(versionformat, self.version)

898

fp.write(l)

899

fp.write(l)

899

fp.write(e)

900

fp.write(e)

900

901

for i in xrange(1, self.count()):

902

for i in xrange(1, self.count()):

902

x = self.index[i]

903

x = self.index[i]

903

e = struct.pack(self.indexformat, *x)

904

e = struct.pack(self.indexformat, *x)

904

fp.write(e)

905

fp.write(e)

905

906

# if we don't call rename, the temp file will never replace the

907

# if we don't call rename, the temp file will never replace the

907

# real index

908

# real index

908

fp.rename()

909

fp.rename()

909

910

tr.replace(self.indexfile, trindex * calc)

911

tr.replace(self.indexfile, trindex * calc)

911

self.chunkcache = None

912

self.chunkcache = None

912

913

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

914

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

914

"""add a revision to the log

915

"""add a revision to the log

915

916

text - the revision data to add

917

text - the revision data to add

917

transaction - the transaction object used for rollback

918

transaction - the transaction object used for rollback

918

link - the linkrev data to add

919

link - the linkrev data to add

919

p1, p2 - the parent nodeids of the revision

920

p1, p2 - the parent nodeids of the revision

920

d - an optional precomputed delta

921

d - an optional precomputed delta

921

"""

922

"""

922

if text is None: text = ""

923

if text is None: text = ""

923

if p1 is None: p1 = self.tip()

924

if p1 is None: p1 = self.tip()

924

if p2 is None: p2 = nullid

925

if p2 is None: p2 = nullid

925

926

node = hash(text, p1, p2)

927

node = hash(text, p1, p2)

927

928

if node in self.nodemap:

929

if node in self.nodemap:

929

return node

930

return node

930

931

n = self.count()

932

n = self.count()

932

t = n - 1

933

t = n - 1

933

934

if n:

935

if n:

935

base = self.base(t)

936

base = self.base(t)

936

start = self.start(base)

937

start = self.start(base)

937

end = self.end(t)

938

end = self.end(t)

938

if not d:

939

if not d:

939

prev = self.revision(self.tip())

940

prev = self.revision(self.tip())

940

d = self.diff(prev, str(text))

941

d = self.diff(prev, str(text))

941

data = compress(d)

942

data = compress(d)

942

l = len(data[1]) + len(data[0])

943

l = len(data[1]) + len(data[0])

943

dist = end - start + l

944

dist = end - start + l

944

945

# full versions are inserted when the needed deltas

946

# full versions are inserted when the needed deltas

946

# become comparable to the uncompressed text

947

# become comparable to the uncompressed text

947

if not n or dist > len(text) * 2:

948

if not n or dist > len(text) * 2:

948

data = compress(text)

949

data = compress(text)

949

l = len(data[1]) + len(data[0])

950

l = len(data[1]) + len(data[0])

950

base = n

951

base = n

951

else:

952

else:

952

base = self.base(t)

953

base = self.base(t)

953

954

offset = 0

955

offset = 0

955

if t >= 0:

956

if t >= 0:

956

offset = self.end(t)

957

offset = self.end(t)

957

958

if self.version == REVLOGV0:

959

if self.version == REVLOGV0:

959

e = (offset, l, base, link, p1, p2, node)

960

e = (offset, l, base, link, p1, p2, node)

960

else:

961

else:

961

e = (self.offset_type(offset, 0), l, len(text),

962

e = (self.offset_type(offset, 0), l, len(text),

962

base, link, self.rev(p1), self.rev(p2), node)

963

base, link, self.rev(p1), self.rev(p2), node)

963

964

self.index.append(e)

965

self.index.append(e)

965

self.nodemap[node] = n

966

self.nodemap[node] = n

966

entry = struct.pack(self.indexformat, *e)

967

entry = struct.pack(self.indexformat, *e)

967

968

if not self.inlinedata():

969

if not self.inlinedata():

969

transaction.add(self.datafile, offset)

970

transaction.add(self.datafile, offset)

970

transaction.add(self.indexfile, n * len(entry))

971

transaction.add(self.indexfile, n * len(entry))

971

f = self.opener(self.datafile, "a")

972

f = self.opener(self.datafile, "a")

972

if data[0]:

973

if data[0]:

973

f.write(data[0])

974

f.write(data[0])

974

f.write(data[1])

975

f.write(data[1])

975

f.close()

976

f.close()

976

f = self.opener(self.indexfile, "a")

977

f = self.opener(self.indexfile, "a")

977

else:

978

else:

978

f = self.opener(self.indexfile, "a+")

979

f = self.opener(self.indexfile, "a+")

979

f.seek(0, 2)

980

f.seek(0, 2)

980

transaction.add(self.indexfile, f.tell(), self.count() - 1)

981

transaction.add(self.indexfile, f.tell(), self.count() - 1)

981

982

if len(self.index) == 1 and self.version != REVLOGV0:

983

if len(self.index) == 1 and self.version != REVLOGV0:

983

l = struct.pack(versionformat, self.version)

984

l = struct.pack(versionformat, self.version)

984

f.write(l)

985

f.write(l)

985

entry = entry[4:]

986

entry = entry[4:]

986

987

f.write(entry)

988

f.write(entry)

988

989

if self.inlinedata():

990

if self.inlinedata():

990

f.write(data[0])

991

f.write(data[0])

991

f.write(data[1])

992

f.write(data[1])

992

self.checkinlinesize(transaction, f)

993

self.checkinlinesize(transaction, f)

993

994

self.cache = (node, n, text)

995

self.cache = (node, n, text)

995

return node

996

return node

996

997

def ancestor(self, a, b):

998

def ancestor(self, a, b):

998

"""calculate the least common ancestor of nodes a and b"""

999

"""calculate the least common ancestor of nodes a and b"""

999

1000

# start with some short cuts for the linear cases

1001

# start with some short cuts for the linear cases

1001

if a == b:

1002

if a == b:

1002

return a

1003

return a

1003

ra = self.rev(a)

1004

ra = self.rev(a)

1004

rb = self.rev(b)

1005

rb = self.rev(b)

1005

if ra < rb:

1006

if ra < rb:

1006

last = b

1007

last = b

1007

first = a

1008

first = a

1008

else:

1009

else:

1009

last = a

1010

last = a

1010

first = b

1011

first = b

1011

1012

# reachable won't include stop in the list, so we have to use a parent

1013

# reachable won't include stop in the list, so we have to use a parent

1013

reachable = self.reachable(last, stop=self.parents(first)[0])

1014

reachable = self.reachable(last, stop=self.parents(first)[0])

1014

if first in reachable:

1015

if first in reachable:

1015

return first

1016

return first

1016

1017

# calculate the distance of every node from root

1018

# calculate the distance of every node from root

1018

dist = {nullid: 0}

1019

dist = {nullid: 0}

1019

for i in xrange(self.count()):

1020

for i in xrange(self.count()):

1020

n = self.node(i)

1021

n = self.node(i)

1021

p1, p2 = self.parents(n)

1022

p1, p2 = self.parents(n)

1022

dist[n] = max(dist[p1], dist[p2]) + 1

1023

dist[n] = max(dist[p1], dist[p2]) + 1

1023

1024

# traverse ancestors in order of decreasing distance from root

1025

# traverse ancestors in order of decreasing distance from root

1025

def ancestors(node):

1026

def ancestors(node):

1026

# we store negative distances because heap returns smallest member

1027

# we store negative distances because heap returns smallest member

1027

h = [(-dist[node], node)]

1028

h = [(-dist[node], node)]

1028

seen = {}

1029

seen = {}

1029

while h:

1030

while h:

1030

d, n = heapq.heappop(h)

1031

d, n = heapq.heappop(h)

1031

if n not in seen:

1032

if n not in seen:

1032

seen[n] = 1

1033

seen[n] = 1

1033

yield (-d, n)

1034

yield (-d, n)

1034

for p in self.parents(n):

1035

for p in self.parents(n):

1035

heapq.heappush(h, (-dist[p], p))

1036

heapq.heappush(h, (-dist[p], p))

1036

1037

def generations(node):

1038

def generations(node):

1038

sg, s = None, {}

1039

sg, s = None, {}

1039

for g,n in ancestors(node):

1040

for g,n in ancestors(node):

1040

if g != sg:

1041

if g != sg:

1041

if sg:

1042

if sg:

1042

yield sg, s

1043

yield sg, s

1043

sg, s = g, {n:1}

1044

sg, s = g, {n:1}

1044

else:

1045

else:

1045

s[n] = 1

1046

s[n] = 1

1046

yield sg, s

1047

yield sg, s

1047

1048

x = generations(a)

1049

x = generations(a)

1049

y = generations(b)

1050

y = generations(b)

1050

gx = x.next()

1051

gx = x.next()

1051

gy = y.next()

1052

gy = y.next()

1052

1053

# increment each ancestor list until it is closer to root than

1054

# increment each ancestor list until it is closer to root than

1054

# the other, or they match

1055

# the other, or they match

1055

while 1:

1056

while 1:

1056

#print "ancestor gen %s %s" % (gx[0], gy[0])

1057

#print "ancestor gen %s %s" % (gx[0], gy[0])

1057

if gx[0] == gy[0]:

1058

if gx[0] == gy[0]:

1058

# find the intersection

1059

# find the intersection

1059

i = [ n for n in gx[1] if n in gy[1] ]

1060

i = [ n for n in gx[1] if n in gy[1] ]

1060

if i:

1061

if i:

1061

return i[0]

1062

return i[0]

1062

else:

1063

else:

1063

#print "next"

1064

#print "next"

1064

gy = y.next()

1065

gy = y.next()

1065

gx = x.next()

1066

gx = x.next()

1066

elif gx[0] < gy[0]:

1067

elif gx[0] < gy[0]:

1067

#print "next y"

1068

#print "next y"

1068

gy = y.next()

1069

gy = y.next()

1069

else:

1070

else:

1070

#print "next x"

1071

#print "next x"

1071

gx = x.next()

1072

gx = x.next()

1072

1073

def group(self, nodelist, lookup, infocollect=None):

1074

def group(self, nodelist, lookup, infocollect=None):

1074

"""calculate a delta group

1075

"""calculate a delta group

1075

1076

Given a list of changeset revs, return a set of deltas and

1077

Given a list of changeset revs, return a set of deltas and

1077

metadata corresponding to nodes. the first delta is

1078

metadata corresponding to nodes. the first delta is

1078

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1079

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1079

have this parent as it has all history before these

1080

have this parent as it has all history before these

1080

changesets. parent is parent[0]

1081

changesets. parent is parent[0]

1081

"""

1082

"""

1082

revs = [self.rev(n) for n in nodelist]

1083

revs = [self.rev(n) for n in nodelist]

1083

1084

# if we don't have any revisions touched by these changesets, bail

1085

# if we don't have any revisions touched by these changesets, bail

1085

if not revs:

1086

if not revs:

1086

yield changegroup.closechunk()

1087

yield changegroup.closechunk()

1087

return

1088

return

1088

1089

# add the parent of the first rev

1090

# add the parent of the first rev

1090

p = self.parents(self.node(revs[0]))[0]

1091

p = self.parents(self.node(revs[0]))[0]

1091

revs.insert(0, self.rev(p))

1092

revs.insert(0, self.rev(p))

1092

1093

# build deltas

1094

# build deltas

1094

for d in xrange(0, len(revs) - 1):

1095

for d in xrange(0, len(revs) - 1):

1095

a, b = revs[d], revs[d + 1]

1096

a, b = revs[d], revs[d + 1]

1096

nb = self.node(b)

1097

nb = self.node(b)

1097

1098

if infocollect is not None:

1099

if infocollect is not None:

1099

infocollect(nb)

1100

infocollect(nb)

1100

1101

d = self.revdiff(a, b)

1102

d = self.revdiff(a, b)

1102

p = self.parents(nb)

1103

p = self.parents(nb)

1103

meta = nb + p[0] + p[1] + lookup(nb)

1104

meta = nb + p[0] + p[1] + lookup(nb)

1104

yield changegroup.genchunk("%s%s" % (meta, d))

1105

yield changegroup.genchunk("%s%s" % (meta, d))

1105

1106

yield changegroup.closechunk()

1107

yield changegroup.closechunk()

1107

1108

def addgroup(self, revs, linkmapper, transaction, unique=0):

1109

def addgroup(self, revs, linkmapper, transaction, unique=0):

1109

"""

1110

"""

1110

add a delta group

1111

add a delta group

1111

1112

given a set of deltas, add them to the revision log. the

1113

given a set of deltas, add them to the revision log. the

1113

first delta is against its parent, which should be in our

1114

first delta is against its parent, which should be in our

1114

log, the rest are against the previous delta.

1115

log, the rest are against the previous delta.

1115

"""

1116

"""

1116

1117

#track the base of the current delta log

1118

#track the base of the current delta log

1118

r = self.count()

1119

r = self.count()

1119

t = r - 1

1120

t = r - 1

1120

node = None

1121

node = None

1121

1122

base = prev = -1

1123

base = prev = -1

1123

start = end = textlen = 0

1124

start = end = textlen = 0

1124

if r:

1125

if r:

1125

end = self.end(t)

1126

end = self.end(t)

1126

1127

ifh = self.opener(self.indexfile, "a+")

1128

ifh = self.opener(self.indexfile, "a+")

1128

ifh.seek(0, 2)

1129

ifh.seek(0, 2)

1129

transaction.add(self.indexfile, ifh.tell(), self.count())

1130

transaction.add(self.indexfile, ifh.tell(), self.count())

1130

if self.inlinedata():

1131

if self.inlinedata():

1131

dfh = None

1132

dfh = None

1132

else:

1133

else:

1133

transaction.add(self.datafile, end)

1134

transaction.add(self.datafile, end)

1134

dfh = self.opener(self.datafile, "a")

1135

dfh = self.opener(self.datafile, "a")

1135

1136

# loop through our set of deltas

1137

# loop through our set of deltas

1137

chain = None

1138

chain = None

1138

for chunk in revs:

1139

for chunk in revs:

1139

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1140

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1140

link = linkmapper(cs)

1141

link = linkmapper(cs)

1141

if node in self.nodemap:

1142

if node in self.nodemap:

1142

# this can happen if two branches make the same change

1143

# this can happen if two branches make the same change

1143

# if unique:

1144

# if unique:

1144

# raise RevlogError(_("already have %s") % hex(node[:4]))

1145

# raise RevlogError(_("already have %s") % hex(node[:4]))

1145

chain = node

1146

chain = node

1146

continue

1147

continue

1147

delta = chunk[80:]

1148

delta = chunk[80:]

1148

1149

for p in (p1, p2):

1150

for p in (p1, p2):

1150

if not p in self.nodemap:

1151

if not p in self.nodemap:

1151

raise RevlogError(_("unknown parent %s") % short(p))

1152

raise RevlogError(_("unknown parent %s") % short(p))

1152

1153

if not chain:

1154

if not chain:

1154

# retrieve the parent revision of the delta chain

1155

# retrieve the parent revision of the delta chain

1155

chain = p1

1156

chain = p1

1156

if not chain in self.nodemap:

1157

if not chain in self.nodemap:

1157

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1158

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1158

1159

# full versions are inserted when the needed deltas become

1160

# full versions are inserted when the needed deltas become

1160

# comparable to the uncompressed text or when the previous

1161

# comparable to the uncompressed text or when the previous

1161

# version is not the one we have a delta against. We use

1162

# version is not the one we have a delta against. We use

1162

# the size of the previous full rev as a proxy for the

1163

# the size of the previous full rev as a proxy for the

1163

# current size.

1164

# current size.

1164

1165

if chain == prev:

1166

if chain == prev:

1166

tempd = compress(delta)

1167

tempd = compress(delta)

1167

cdelta = tempd[0] + tempd[1]

1168

cdelta = tempd[0] + tempd[1]

1168

textlen = mdiff.patchedsize(textlen, delta)

1169

textlen = mdiff.patchedsize(textlen, delta)

1169

1170

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1171

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1171

# flush our writes here so we can read it in revision

1172

# flush our writes here so we can read it in revision

1172

if dfh:

1173

if dfh:

1173

dfh.flush()

1174

dfh.flush()

1174

ifh.flush()

1175

ifh.flush()

1175

text = self.revision(chain)

1176

text = self.revision(chain)

1176

text = self.patches(text, [delta])

1177

text = self.patches(text, [delta])

1177

chk = self.addrevision(text, transaction, link, p1, p2)

1178

chk = self.addrevision(text, transaction, link, p1, p2)

1178

if chk != node:

1179

if chk != node:

1179

raise RevlogError(_("consistency error adding group"))

1180

raise RevlogError(_("consistency error adding group"))

1180

textlen = len(text)

1181

textlen = len(text)

1181

else:

1182

else:

1182

if self.version == REVLOGV0:

1183

if self.version == REVLOGV0:

1183

e = (end, len(cdelta), base, link, p1, p2, node)

1184

e = (end, len(cdelta), base, link, p1, p2, node)

1184

else:

1185

else:

1185

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1186

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1186

link, self.rev(p1), self.rev(p2), node)

1187

link, self.rev(p1), self.rev(p2), node)

1187

self.index.append(e)

1188

self.index.append(e)

1188

self.nodemap[node] = r

1189

self.nodemap[node] = r

1189

if self.inlinedata():

1190

if self.inlinedata():

1190

ifh.write(struct.pack(self.indexformat, *e))

1191

ifh.write(struct.pack(self.indexformat, *e))

1191

ifh.write(cdelta)

1192

ifh.write(cdelta)

1192

self.checkinlinesize(transaction, ifh)

1193

self.checkinlinesize(transaction, ifh)

1193

if not self.inlinedata():

1194

if not self.inlinedata():

1194

dfh = self.opener(self.datafile, "a")

1195

dfh = self.opener(self.datafile, "a")

1195

ifh = self.opener(self.indexfile, "a")

1196

ifh = self.opener(self.indexfile, "a")

1196

else:

1197

else:

1197

if not dfh:

1198

if not dfh:

1198

# addrevision switched from inline to conventional

1199

# addrevision switched from inline to conventional

1199

# reopen the index

1200

# reopen the index

1200

dfh = self.opener(self.datafile, "a")

1201

dfh = self.opener(self.datafile, "a")

1201

ifh = self.opener(self.indexfile, "a")

1202

ifh = self.opener(self.indexfile, "a")

1202

dfh.write(cdelta)

1203

dfh.write(cdelta)

1203

ifh.write(struct.pack(self.indexformat, *e))

1204

ifh.write(struct.pack(self.indexformat, *e))

1204

1205

t, r, chain, prev = r, r + 1, node, node

1206

t, r, chain, prev = r, r + 1, node, node

1206

base = self.base(t)

1207

base = self.base(t)

1207

start = self.start(base)

1208

start = self.start(base)

1208

end = self.end(t)

1209

end = self.end(t)

1209

1210

return node

1211

return node

1211

1212

def strip(self, rev, minlink):

1213

def strip(self, rev, minlink):

1213

if self.count() == 0 or rev >= self.count():

1214

if self.count() == 0 or rev >= self.count():

1214

return

1215

return

1215

1216

if isinstance(self.index, lazyindex):

1217

if isinstance(self.index, lazyindex):

1217

self.loadindexmap()

1218

self.loadindexmap()

1218

1219

# When stripping away a revision, we need to make sure it

1220

# When stripping away a revision, we need to make sure it

1220

# does not actually belong to an older changeset.

1221

# does not actually belong to an older changeset.

1221

# The minlink parameter defines the oldest revision

1222

# The minlink parameter defines the oldest revision

1222

# we're allowed to strip away.

1223

# we're allowed to strip away.

1223

while minlink > self.index[rev][-4]:

1224

while minlink > self.index[rev][-4]:

1224

rev += 1

1225

rev += 1

1225

if rev >= self.count():

1226

if rev >= self.count():

1226

return

1227

return

1227

1228

# first truncate the files on disk

1229

# first truncate the files on disk

1229

end = self.start(rev)

1230

end = self.start(rev)

1230

if not self.inlinedata():

1231

if not self.inlinedata():

1231

df = self.opener(self.datafile, "a")

1232

df = self.opener(self.datafile, "a")

1232

df.truncate(end)

1233

df.truncate(end)

1233

end = rev * struct.calcsize(self.indexformat)

1234

end = rev * struct.calcsize(self.indexformat)

1234

else:

1235

else:

1235

end += rev * struct.calcsize(self.indexformat)

1236

end += rev * struct.calcsize(self.indexformat)

1236

1237

indexf = self.opener(self.indexfile, "a")

1238

indexf = self.opener(self.indexfile, "a")

1238

indexf.truncate(end)

1239

indexf.truncate(end)

1239

1240

# then reset internal state in memory to forget those revisions

1241

# then reset internal state in memory to forget those revisions

1241

self.cache = None

1242

self.cache = None

1242

self.chunkcache = None

1243

self.chunkcache = None

1243

for x in xrange(rev, self.count()):

1244

for x in xrange(rev, self.count()):

1244

del self.nodemap[self.node(x)]

1245

del self.nodemap[self.node(x)]

1245

1246

del self.index[rev:]

1247

del self.index[rev:]

1247

1248

def checksize(self):

1249

def checksize(self):

1249

expected = 0

1250

expected = 0

1250

if self.count():

1251

if self.count():

1251

expected = self.end(self.count() - 1)

1252

expected = self.end(self.count() - 1)

1252

1253

try:

1254

try:

1254

f = self.opener(self.datafile)

1255

f = self.opener(self.datafile)

1255

f.seek(0, 2)

1256

f.seek(0, 2)

1256

actual = f.tell()

1257

actual = f.tell()

1257

dd = actual - expected

1258

dd = actual - expected

1258

except IOError, inst:

1259

except IOError, inst:

1259

if inst.errno != errno.ENOENT:

1260

if inst.errno != errno.ENOENT:

1260

raise

1261

raise

1261

dd = 0

1262

dd = 0

1262

1263

try:

1264

try:

1264

f = self.opener(self.indexfile)

1265

f = self.opener(self.indexfile)

1265

f.seek(0, 2)

1266

f.seek(0, 2)

1266

actual = f.tell()

1267

actual = f.tell()

1267

s = struct.calcsize(self.indexformat)

1268

s = struct.calcsize(self.indexformat)

1268

i = actual / s

1269

i = actual / s

1269

di = actual - (i * s)

1270

di = actual - (i * s)

1270

if self.inlinedata():

1271

if self.inlinedata():

1271

databytes = 0

1272

databytes = 0

1272

for r in xrange(self.count()):

1273

for r in xrange(self.count()):

1273

databytes += self.length(r)

1274

databytes += self.length(r)

1274

dd = 0

1275

dd = 0

1275

di = actual - self.count() * s - databytes

1276

di = actual - self.count() * s - databytes

1276

except IOError, inst:

1277

except IOError, inst:

1277

if inst.errno != errno.ENOENT:

1278

if inst.errno != errno.ENOENT:

1278

raise

1279

raise

1279

di = 0

1280

di = 0

1280

1281

return (dd, di)

1282

return (dd, di)

1282

1283

1284

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii changegroup errno heapq mdiff os")
             demandload(globals(), "sha struct util zlib")
             # revlog version strings
             REVLOGV0 = 0
             REVLOGNG = 1
             # revlog flags
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             def flagstr(flag):
                 if flag == "inline":
                     return REVLOGNGINLINEDATA
                 raise RevlogError(_("unknown revlog flag %s" % flag))
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">i"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
                                                   hasattr(util, 'win32api'))
                 def __init__(self, dataf, size, indexformat, shaoffset):
                     self.dataf = dataf
                     self.format = indexformat
                     self.s = struct.calcsize(indexformat)
                     self.indexformat = indexformat
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                     self.shaoffset = shaoffset
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap: return
                     start = 0
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + self.shaoffset:off + self.shaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all: return
                     if data is None:
                         self.dataf.seek(blockstart)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[self.shaoffset:self.shaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap: return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so weh have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.rfind(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + self.shaoffset:off + self.shaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all: return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (512 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~(32)) * self.s
                             blocksize = self.s * 64
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all: self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     ret = self.p.index[pos] or self.load(pos)
                     if isinstance(ret, str):
                         ret = struct.unpack(self.p.indexformat, ret)
                     return ret
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = item
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         ret = self.p.index[i]
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = struct.unpack(self.p.indexformat, ret)
                         yield ret[-1]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile,
                              defversion=REVLOG_DEFAULT_VERSION):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.indexstat = None
                     self.cache = None
                     self.chunkcache = None
                     self.defversion = defversion
                     self.load()
                 def load(self):
                     v = self.defversion
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     else:
                         try:
                             st = util.fstat(f)
                         except AttributeError, inst:
                             st = None
                         else:
                             oldst = self.indexstat
                             if (oldst and st.st_dev == oldst.st_dev
                                 and st.st_ino == oldst.st_ino
                                 and st.st_mtime == oldst.st_mtime
                                 and st.st_ctime == oldst.st_ctime):
                                 return
                             self.indexstat = st
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i)[0]
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_("index %s invalid flags %x for format v0" %
                                                (self.indexfile, flags)))
                     elif fmt == REVLOGNG:
                         if flags & ~REVLOGNGINLINEDATA:
                             raise RevlogError(_("index %s invalid flags %x for revlogng" %
                                                (self.indexfile, flags)))
                     else:
                         raise RevlogError(_("index %s invalid format %d" %
                                            (self.indexfile, fmt)))
                     self.version = v
                     if v == REVLOGV0:
                         self.indexformat = indexformatv0
                         shaoffset = v0shaoffset
                     else:
                         self.indexformat = indexformatng
                         shaoffset = ngshaoffset
                     if i:
                         if (lazyparser.safe_to_use and not self.inlinedata() and
                             st and st.st_size > 10000):
                             # big index, let's parse it on demand
                             parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
                             self.index = lazyindex(parser)
                             self.nodemap = lazymap(parser)
                         else:
                             self.parseindex(f, st)
                         if self.version != REVLOGV0:
                             e = list(self.index[0])
                             type = self.ngtype(e[0])
                             e[0] = self.offset_type(0, type)
                             self.index[0] = e
                     else:
                         self.nodemap = { nullid: -1}
                         self.index = []
                 def parseindex(self, fp, st):
                     s = struct.calcsize(self.indexformat)
                     self.index = []
                     self.nodemap =  {nullid: -1}
                     inline = self.inlinedata()
                     n = 0
                     leftover = None
                     while True:
                         if st:
                             data = fp.read(65536)
                         else:
                             # hack for httprangereader, it doesn't do partial reads well
                             data = fp.read()
                         if not data:
                             break
                         if n == 0 and self.inlinedata():
                             # cache the first chunk
                             self.chunkcache = (0, data)
                         if leftover:
                             data = leftover + data
                             leftover = None
                         off = 0
                         l = len(data)
                         while off < l:
                             if l - off < s:
                                 leftover = data[off:]
                                 break
                             cur = data[off:off + s]
                             off += s
                             e = struct.unpack(self.indexformat, cur)
                             self.index.append(e)
                             self.nodemap[e[-1]] = n
                             n += 1
                             if inline:
                                 off += e[1]
                                 if off > l:
                                     # some things don't seek well, just read it
                                     fp.read(off - l)
                         if not st:
                             break
                 def ngoffset(self, q):
                     if q & 0xFFFF:
                         raise RevlogError(_('%s: incompatible revision flag %x') %
                                           (self.indexfile, q))
                     return long(q >> 16)
                 def ngtype(self, q):
                     return int(q & 0xFFFF)
                 def offset_type(self, offset, type):
                     return long(long(offset) << 16 | type)
                 def loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev):
                     return (rev < 0) and nullid or self.index[rev][-1]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
-                def linkrev(self, node): return self.index[self.rev(node)][-4]
+                def linkrev(self, node):
+                    return (node == nullid) and -1 or self.index[self.rev(node)][-4]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     r = self.rev(node)
                     d = self.index[r][-3:-1]
                     if self.version == REVLOGV0:
                         return d
                     return [ self.node(x) for x in d ]
                 def parentrevs(self, rev):
                     if rev == -1:
                         return (-1, -1)
                     d = self.index[rev][-3:-1]
                     if self.version == REVLOGV0:
                         return [ self.rev(x) for x in d ]
                     return d
                 def start(self, rev):
                     if rev < 0:
                         return -1
                     if self.version != REVLOGV0:
                         return self.ngoffset(self.index[rev][0])
                     return self.index[rev][0]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = -1
                     if self.version != REVLOGV0:
                         l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def length(self, rev):
                     if rev < 0:
                         return 0
                     else:
                         return self.index[rev][1]
                 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     startrev = self.rev(start)
                     reachable = {startrev: 1}
                     heads = {startrev: 1}
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, self.count()):
                         for p in parentrevs(r):
                             if p in reachable:
                                 reachable[r] = 1
                                 heads[r] = 1
                             if p in heads:
                                 del heads[p]
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     if id in self.nodemap:
                         return id
                     if type(id) == type(0):
                         return self.node(id)
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev, df=None, cachelen=4096):
                     start, length = self.start(rev), self.length(rev)
                     inline = self.inlinedata()
                     if inline:
                         start += (rev + 1) * struct.calcsize(self.indexformat)
                     end = start + length
                     def loadcache(df):
                         cache_length = max(cachelen, length) # 4k
                         if not df:
                             if inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache(df)
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache(df)
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     b1 = self.base(rev1)
                     b2 = self.base(rev2)
                     if b1 == b2 and rev1 + 1 == rev2:
                         return self.chunk(rev2)
                     else:
                         return self.diff(self.revision(self.node(rev1)),
                                          self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     if self.inlinedata():
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                         self.loadindex(base, rev + 1)
                     else:
                         self.loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r, df=df))
                     text = self.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self.inlinedata():
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                         fp.seek(0, 2)
                     size = fp.tell()
                     if size < 131072:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo == None:
                         raise RevlogError(_("%s not found in the transaction"  %
                                           self.indexfile))
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     df = self.opener(self.datafile, 'w')
                     calc = struct.calcsize(self.indexformat)
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     if self.count():
                         x = self.index[0]
                         e = struct.pack(self.indexformat, *x)[4:]
                         l = struct.pack(versionformat, self.version)
                         fp.write(l)
                         fp.write(e)
                     for i in xrange(1, self.count()):
                         x = self.index[i]
                         e = struct.pack(self.indexformat, *x)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * calc)
                     self.chunkcache = None
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     if self.version == REVLOGV0:
                         e = (offset, l, base, link, p1, p2, node)
                     else:
                         e = (self.offset_type(offset, 0), l, len(text),
                              base, link, self.rev(p1), self.rev(p2), node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(self.indexformat, *e)
                     if not self.inlinedata():
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, n * len(entry))
                         f = self.opener(self.datafile, "a")
                         if data[0]:
                             f.write(data[0])
                         f.write(data[1])
                         f.close()
                         f = self.opener(self.indexfile, "a")
                     else:
                         f = self.opener(self.indexfile, "a+")
                         f.seek(0, 2)
                         transaction.add(self.indexfile, f.tell(), self.count() - 1)
                     if len(self.index) == 1 and self.version != REVLOGV0:
                         l = struct.pack(versionformat, self.version)
                         f.write(l)
                         entry = entry[4:]
                     f.write(entry)
                     if self.inlinedata():
                         f.write(data[0])
                         f.write(data[1])
                         self.checkinlinesize(transaction, f)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # start with some short cuts for the linear cases
                     if a == b:
                         return a
                     ra = self.rev(a)
                     rb = self.rev(b)
                     if ra < rb:
                         last = b
                         first = a
                     else:
                         last = a
                         first = b
                     # reachable won't include stop in the list, so we have to use a parent
                     reachable = self.reachable(last, stop=self.parents(first)[0])
                     if first in reachable:
                         return first
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         d = self.revdiff(a, b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         yield changegroup.genchunk("%s%s" % (meta, d))
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = -1
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     ifh.seek(0, 2)
                     transaction.add(self.indexfile, ifh.tell(), self.count())
                     if self.inlinedata():
                         dfh = None
                     else:
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             if self.version == REVLOGV0:
                                 e = (end, len(cdelta), base, link, p1, p2, node)
                             else:
                                 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             if self.inlinedata():
                                 ifh.write(struct.pack(self.indexformat, *e))
                                 ifh.write(cdelta)
                                 self.checkinlinesize(transaction, ifh)
                                 if not self.inlinedata():
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 if not dfh:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 dfh.write(cdelta)
                                 ifh.write(struct.pack(self.indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self.loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][-4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self.inlinedata():
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * struct.calcsize(self.indexformat)
                     else:
                         end += rev * struct.calcsize(self.indexformat)
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
                     self.chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(self.indexformat)
                         i = actual / s
                         di = actual - (i * s)
                         if self.inlinedata():
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += self.length(r)
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)