upstream/mercurial-mirror Commit - r2489:568e58ee

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

17

demandload(globals(), "sha struct util zlib")

17

demandload(globals(), "sha struct util zlib")

18

19

# revlog version strings

19

# revlog version strings

20

REVLOGV0 = 0

20

REVLOGV0 = 0

21

REVLOGNG = 1

21

REVLOGNG = 1

22

23

# revlog flags

23

# revlog flags

24

REVLOGNGINLINEDATA = (1 << 16)

24

REVLOGNGINLINEDATA = (1 << 16)

25

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

25

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

26

27

REVLOG_DEFAULT_FORMAT = REVLOGNG

27

REVLOG_DEFAULT_FORMAT = REVLOGNG

28

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

28

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

29

30

def flagstr(flag):

30

def flagstr(flag):

31

if flag == "inline":

31

if flag == "inline":

32

return REVLOGNGINLINEDATA

32

return REVLOGNGINLINEDATA

33

raise RevlogError(_("unknown revlog flag %s" % flag))

33

raise RevlogError(_("unknown revlog flag %s" % flag))

34

35

def hash(text, p1, p2):

35

def hash(text, p1, p2):

36

"""generate a hash from the given text and its parent hashes

36

"""generate a hash from the given text and its parent hashes

37

38

This hash combines both the current file contents and its history

38

This hash combines both the current file contents and its history

39

in a manner that makes it easy to distinguish nodes with the same

39

in a manner that makes it easy to distinguish nodes with the same

40

content in the revision graph.

40

content in the revision graph.

41

"""

41

"""

42

l = [p1, p2]

42

l = [p1, p2]

43

l.sort()

43

l.sort()

44

s = sha.new(l[0])

44

s = sha.new(l[0])

45

s.update(l[1])

45

s.update(l[1])

46

s.update(text)

46

s.update(text)

47

return s.digest()

47

return s.digest()

48

49

def compress(text):

49

def compress(text):

50

""" generate a possibly-compressed representation of text """

50

""" generate a possibly-compressed representation of text """

51

if not text: return ("", text)

51

if not text: return ("", text)

52

if len(text) < 44:

52

if len(text) < 44:

53

if text[0] == '\0': return ("", text)

53

if text[0] == '\0': return ("", text)

54

return ('u', text)

54

return ('u', text)

55

bin = zlib.compress(text)

55

bin = zlib.compress(text)

56

if len(bin) > len(text):

56

if len(bin) > len(text):

57

if text[0] == '\0': return ("", text)

57

if text[0] == '\0': return ("", text)

58

return ('u', text)

58

return ('u', text)

59

return ("", bin)

59

return ("", bin)

60

61

def decompress(bin):

61

def decompress(bin):

62

""" decompress the given input """

62

""" decompress the given input """

63

if not bin: return bin

63

if not bin: return bin

64

t = bin[0]

64

t = bin[0]

65

if t == '\0': return bin

65

if t == '\0': return bin

66

if t == 'x': return zlib.decompress(bin)

66

if t == 'x': return zlib.decompress(bin)

67

if t == 'u': return bin[1:]

67

if t == 'u': return bin[1:]

68

raise RevlogError(_("unknown compression type %r") % t)

68

raise RevlogError(_("unknown compression type %r") % t)

69

70

indexformatv0 = ">4l20s20s20s"

70

indexformatv0 = ">4l20s20s20s"

71

v0shaoffset = 56

71

v0shaoffset = 56

72

# index ng:

72

# index ng:

73

# 6 bytes offset

73

# 6 bytes offset

74

# 2 bytes flags

74

# 2 bytes flags

75

# 4 bytes compressed length

75

# 4 bytes compressed length

76

# 4 bytes uncompressed length

76

# 4 bytes uncompressed length

77

# 4 bytes: base rev

77

# 4 bytes: base rev

78

# 4 bytes link rev

78

# 4 bytes link rev

79

# 4 bytes parent 1 rev

79

# 4 bytes parent 1 rev

80

# 4 bytes parent 2 rev

80

# 4 bytes parent 2 rev

81

# 32 bytes: nodeid

81

# 32 bytes: nodeid

82

indexformatng = ">Qiiiiii20s12x"

82

indexformatng = ">Qiiiiii20s12x"

83

ngshaoffset = 32

83

ngshaoffset = 32

84

versionformat = ">i"

84

versionformat = ">i"

85

86

class lazyparser(object):

86

class lazyparser(object):

87

"""

87

"""

88

this class avoids the need to parse the entirety of large indices

88

this class avoids the need to parse the entirety of large indices

89

"""

89

"""

90

91

# lazyparser is not safe to use on windows if win32 extensions not

91

# lazyparser is not safe to use on windows if win32 extensions not

92

# available. it keeps file handle open, which make it not possible

92

# available. it keeps file handle open, which make it not possible

93

# to break hardlinks on local cloned repos.

93

# to break hardlinks on local cloned repos.

94

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

94

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

95

hasattr(util, 'win32api'))

95

hasattr(util, 'win32api'))

96

97

def __init__(self, dataf, size, indexformat, shaoffset):

97

def __init__(self, dataf, size, indexformat, shaoffset):

98

self.dataf = dataf

98

self.dataf = dataf

99

self.format = indexformat

99

self.format = indexformat

100

self.s = struct.calcsize(indexformat)

100

self.s = struct.calcsize(indexformat)

101

self.indexformat = indexformat

101

self.indexformat = indexformat

102

self.datasize = size

102

self.datasize = size

103

self.l = size/self.s

103

self.l = size/self.s

104

self.index = [None] * self.l

104

self.index = [None] * self.l

105

self.map = {nullid: -1}

105

self.map = {nullid: -1}

106

self.allmap = 0

106

self.allmap = 0

107

self.all = 0

107

self.all = 0

108

self.mapfind_count = 0

108

self.mapfind_count = 0

109

self.shaoffset = shaoffset

109

self.shaoffset = shaoffset

110

111

def loadmap(self):

111

def loadmap(self):

112

"""

112

"""

113

during a commit, we need to make sure the rev being added is

113

during a commit, we need to make sure the rev being added is

114

not a duplicate. This requires loading the entire index,

114

not a duplicate. This requires loading the entire index,

115

which is fairly slow. loadmap can load up just the node map,

115

which is fairly slow. loadmap can load up just the node map,

116

which takes much less time.

116

which takes much less time.

117

"""

117

"""

118

if self.allmap: return

118

if self.allmap: return

119

start = 0

119

start = 0

120

end = self.datasize

120

end = self.datasize

121

self.allmap = 1

121

self.allmap = 1

122

cur = 0

122

cur = 0

123

count = 0

123

count = 0

124

blocksize = self.s * 256

124

blocksize = self.s * 256

125

self.dataf.seek(0)

125

self.dataf.seek(0)

126

while cur < end:

126

while cur < end:

127

data = self.dataf.read(blocksize)

127

data = self.dataf.read(blocksize)

128

off = 0

128

off = 0

129

for x in xrange(256):

129

for x in xrange(256):

130

n = data[off + self.shaoffset:off + self.shaoffset + 20]

130

n = data[off + self.shaoffset:off + self.shaoffset + 20]

131

self.map[n] = count

131

self.map[n] = count

132

count += 1

132

count += 1

133

if count >= self.l:

133

if count >= self.l:

134

break

134

break

135

off += self.s

135

off += self.s

136

cur += blocksize

136

cur += blocksize

137

138

def loadblock(self, blockstart, blocksize, data=None):

138

def loadblock(self, blockstart, blocksize, data=None):

139

if self.all: return

139

if self.all: return

140

if data is None:

140

if data is None:

141

self.dataf.seek(blockstart)

141

self.dataf.seek(blockstart)

142

data = self.dataf.read(blocksize)

142

data = self.dataf.read(blocksize)

143

lend = len(data) / self.s

143

lend = len(data) / self.s

144

i = blockstart / self.s

144

i = blockstart / self.s

145

off = 0

145

off = 0

146

for x in xrange(lend):

146

for x in xrange(lend):

147

if self.index[i + x] == None:

147

if self.index[i + x] == None:

148

b = data[off : off + self.s]

148

b = data[off : off + self.s]

149

self.index[i + x] = b

149

self.index[i + x] = b

150

n = b[self.shaoffset:self.shaoffset + 20]

150

n = b[self.shaoffset:self.shaoffset + 20]

151

self.map[n] = i + x

151

self.map[n] = i + x

152

off += self.s

152

off += self.s

153

154

def findnode(self, node):

154

def findnode(self, node):

155

"""search backwards through the index file for a specific node"""

155

"""search backwards through the index file for a specific node"""

156

if self.allmap: return None

156

if self.allmap: return None

157

158

# hg log will cause many many searches for the manifest

158

# hg log will cause many many searches for the manifest

159

# nodes. After we get called a few times, just load the whole

159

# nodes. After we get called a few times, just load the whole

160

# thing.

160

# thing.

161

if self.mapfind_count > 8:

161

if self.mapfind_count > 8:

162

self.loadmap()

162

self.loadmap()

163

if node in self.map:

163

if node in self.map:

164

return node

164

return node

165

return None

165

return None

166

self.mapfind_count += 1

166

self.mapfind_count += 1

167

last = self.l - 1

167

last = self.l - 1

168

while self.index[last] != None:

168

while self.index[last] != None:

169

if last == 0:

169

if last == 0:

170

self.all = 1

170

self.all = 1

171

self.allmap = 1

171

self.allmap = 1

172

return None

172

return None

173

last -= 1

173

last -= 1

174

end = (last + 1) * self.s

174

end = (last + 1) * self.s

175

blocksize = self.s * 256

175

blocksize = self.s * 256

176

while end >= 0:

176

while end >= 0:

177

start = max(end - blocksize, 0)

177

start = max(end - blocksize, 0)

178

self.dataf.seek(start)

178

self.dataf.seek(start)

179

data = self.dataf.read(end - start)

179

data = self.dataf.read(end - start)

180

findend = end - start

180

findend = end - start

181

while True:

181

while True:

182

# we're searching backwards, so weh have to make sure

182

# we're searching backwards, so weh have to make sure

183

# we don't find a changeset where this node is a parent

183

# we don't find a changeset where this node is a parent

184

off = data.rfind(node, 0, findend)

184

off = data.rfind(node, 0, findend)

185

findend = off

185

findend = off

186

if off >= 0:

186

if off >= 0:

187

i = off / self.s

187

i = off / self.s

188

off = i * self.s

188

off = i * self.s

189

n = data[off + self.shaoffset:off + self.shaoffset + 20]

189

n = data[off + self.shaoffset:off + self.shaoffset + 20]

190

if n == node:

190

if n == node:

191

self.map[n] = i + start / self.s

191

self.map[n] = i + start / self.s

192

return node

192

return node

193

else:

193

else:

194

break

194

break

195

end -= blocksize

195

end -= blocksize

196

return None

196

return None

197

198

def loadindex(self, i=None, end=None):

198

def loadindex(self, i=None, end=None):

199

if self.all: return

199

if self.all: return

200

all = False

200

all = False

201

if i == None:

201

if i == None:

202

blockstart = 0

202

blockstart = 0

203

blocksize = (512 / self.s) * self.s

203

blocksize = (512 / self.s) * self.s

204

end = self.datasize

204

end = self.datasize

205

all = True

205

all = True

206

else:

206

else:

207

if end:

207

if end:

208

blockstart = i * self.s

208

blockstart = i * self.s

209

end = end * self.s

209

end = end * self.s

210

blocksize = end - blockstart

210

blocksize = end - blockstart

211

else:

211

else:

212

blockstart = (i & ~(32)) * self.s

212

blockstart = (i & ~(32)) * self.s

213

blocksize = self.s * 64

213

blocksize = self.s * 64

214

end = blockstart + blocksize

214

end = blockstart + blocksize

215

while blockstart < end:

215

while blockstart < end:

216

self.loadblock(blockstart, blocksize)

216

self.loadblock(blockstart, blocksize)

217

blockstart += blocksize

217

blockstart += blocksize

218

if all: self.all = True

218

if all: self.all = True

219

220

class lazyindex(object):

220

class lazyindex(object):

221

"""a lazy version of the index array"""

221

"""a lazy version of the index array"""

222

def __init__(self, parser):

222

def __init__(self, parser):

223

self.p = parser

223

self.p = parser

224

def __len__(self):

224

def __len__(self):

225

return len(self.p.index)

225

return len(self.p.index)

226

def load(self, pos):

226

def load(self, pos):

227

if pos < 0:

227

if pos < 0:

228

pos += len(self.p.index)

228

pos += len(self.p.index)

229

self.p.loadindex(pos)

229

self.p.loadindex(pos)

230

return self.p.index[pos]

230

return self.p.index[pos]

231

def __getitem__(self, pos):

231

def __getitem__(self, pos):

232

ret = self.p.index[pos] or self.load(pos)

232

ret = self.p.index[pos] or self.load(pos)

233

if isinstance(ret, str):

233

if isinstance(ret, str):

234

ret = struct.unpack(self.p.indexformat, ret)

234

ret = struct.unpack(self.p.indexformat, ret)

235

return ret

235

return ret

236

def __setitem__(self, pos, item):

236

def __setitem__(self, pos, item):

237

self.p.index[pos] = item

237

self.p.index[pos] = item

238

def __delitem__(self, pos):

238

def __delitem__(self, pos):

239

del self.p.index[pos]

239

del self.p.index[pos]

240

def append(self, e):

240

def append(self, e):

241

self.p.index.append(e)

241

self.p.index.append(e)

242

243

class lazymap(object):

243

class lazymap(object):

244

"""a lazy version of the node map"""

244

"""a lazy version of the node map"""

245

def __init__(self, parser):

245

def __init__(self, parser):

246

self.p = parser

246

self.p = parser

247

def load(self, key):

247

def load(self, key):

248

n = self.p.findnode(key)

248

n = self.p.findnode(key)

249

if n == None:

249

if n == None:

250

raise KeyError(key)

250

raise KeyError(key)

251

def __contains__(self, key):

251

def __contains__(self, key):

252

if key in self.p.map:

252

if key in self.p.map:

253

return True

253

return True

254

self.p.loadmap()

254

self.p.loadmap()

255

return key in self.p.map

255

return key in self.p.map

256

def __iter__(self):

256

def __iter__(self):

257

yield nullid

257

yield nullid

258

for i in xrange(self.p.l):

258

for i in xrange(self.p.l):

259

ret = self.p.index[i]

259

ret = self.p.index[i]

260

if not ret:

260

if not ret:

261

self.p.loadindex(i)

261

self.p.loadindex(i)

262

ret = self.p.index[i]

262

ret = self.p.index[i]

263

if isinstance(ret, str):

263

if isinstance(ret, str):

264

ret = struct.unpack(self.p.indexformat, ret)

264

ret = struct.unpack(self.p.indexformat, ret)

265

yield ret[-1]

265

yield ret[-1]

266

def __getitem__(self, key):

266

def __getitem__(self, key):

267

try:

267

try:

268

return self.p.map[key]

268

return self.p.map[key]

269

except KeyError:

269

except KeyError:

270

try:

270

try:

271

self.load(key)

271

self.load(key)

272

return self.p.map[key]

272

return self.p.map[key]

273

except KeyError:

273

except KeyError:

274

raise KeyError("node " + hex(key))

274

raise KeyError("node " + hex(key))

275

def __setitem__(self, key, val):

275

def __setitem__(self, key, val):

276

self.p.map[key] = val

276

self.p.map[key] = val

277

def __delitem__(self, key):

277

def __delitem__(self, key):

278

del self.p.map[key]

278

del self.p.map[key]

279

280

class RevlogError(Exception): pass

280

class RevlogError(Exception): pass

281

282

class revlog(object):

282

class revlog(object):

283

"""

283

"""

284

the underlying revision storage object

284

the underlying revision storage object

285

286

A revlog consists of two parts, an index and the revision data.

286

A revlog consists of two parts, an index and the revision data.

287

288

The index is a file with a fixed record size containing

288

The index is a file with a fixed record size containing

289

information on each revision, includings its nodeid (hash), the

289

information on each revision, includings its nodeid (hash), the

290

nodeids of its parents, the position and offset of its data within

290

nodeids of its parents, the position and offset of its data within

291

the data file, and the revision it's based on. Finally, each entry

291

the data file, and the revision it's based on. Finally, each entry

292

contains a linkrev entry that can serve as a pointer to external

292

contains a linkrev entry that can serve as a pointer to external

293

data.

293

data.

294

295

The revision data itself is a linear collection of data chunks.

295

The revision data itself is a linear collection of data chunks.

296

Each chunk represents a revision and is usually represented as a

296

Each chunk represents a revision and is usually represented as a

297

delta against the previous chunk. To bound lookup time, runs of

297

delta against the previous chunk. To bound lookup time, runs of

298

deltas are limited to about 2 times the length of the original

298

deltas are limited to about 2 times the length of the original

299

version data. This makes retrieval of a version proportional to

299

version data. This makes retrieval of a version proportional to

300

its size, or O(1) relative to the number of revisions.

300

its size, or O(1) relative to the number of revisions.

301

302

Both pieces of the revlog are written to in an append-only

302

Both pieces of the revlog are written to in an append-only

303

fashion, which means we never need to rewrite a file to insert or

303

fashion, which means we never need to rewrite a file to insert or

304

remove data, and can use some simple techniques to avoid the need

304

remove data, and can use some simple techniques to avoid the need

305

for locking while reading.

305

for locking while reading.

306

"""

306

"""

307

def __init__(self, opener, indexfile, datafile,

307

def __init__(self, opener, indexfile, datafile,

308

defversion=REVLOG_DEFAULT_VERSION):

308

defversion=REVLOG_DEFAULT_VERSION):

309

"""

309

"""

310

create a revlog object

310

create a revlog object

311

312

opener is a function that abstracts the file opening operation

312

opener is a function that abstracts the file opening operation

313

and can be used to implement COW semantics or the like.

313

and can be used to implement COW semantics or the like.

314

"""

314

"""

315

self.indexfile = indexfile

315

self.indexfile = indexfile

316

self.datafile = datafile

316

self.datafile = datafile

317

self.opener = opener

317

self.opener = opener

318

319

self.indexstat = None

319

self.indexstat = None

320

self.cache = None

320

self.cache = None

321

self.chunkcache = None

321

self.chunkcache = None

322

self.defversion = defversion

322

self.defversion = defversion

323

self.load()

323

self.load()

324

325

def load(self):

325

def load(self):

326

v = self.defversion

326

v = self.defversion

327

try:

327

try:

328

f = self.opener(self.indexfile)

328

f = self.opener(self.indexfile)

329

i = f.read(4)

329

i = f.read(4)

330

f.seek(0)

330

f.seek(0)

331

except IOError, inst:

331

except IOError, inst:

332

if inst.errno != errno.ENOENT:

332

if inst.errno != errno.ENOENT:

333

raise

333

raise

334

i = ""

334

i = ""

335

else:

335

else:

336

try:

336

try:

337

st = util.fstat(f)

337

st = util.fstat(f)

338

except AttributeError, inst:

338

except AttributeError, inst:

339

st = None

339

st = None

340

else:

340

else:

341

oldst = self.indexstat

341

oldst = self.indexstat

342

if (oldst and st.st_dev == oldst.st_dev

342

if (oldst and st.st_dev == oldst.st_dev

343

and st.st_ino == oldst.st_ino

343

and st.st_ino == oldst.st_ino

344

and st.st_mtime == oldst.st_mtime

344

and st.st_mtime == oldst.st_mtime

345

and st.st_ctime == oldst.st_ctime):

345

and st.st_ctime == oldst.st_ctime):

346

return

346

return

347

self.indexstat = st

347

self.indexstat = st

348

if len(i) > 0:

348

if len(i) > 0:

349

v = struct.unpack(versionformat, i)[0]

349

v = struct.unpack(versionformat, i)[0]

350

flags = v & ~0xFFFF

350

flags = v & ~0xFFFF

351

fmt = v & 0xFFFF

351

fmt = v & 0xFFFF

352

if fmt == REVLOGV0:

352

if fmt == REVLOGV0:

353

if flags:

353

if flags:

354

raise RevlogError(_("index %s invalid flags %x for format v0" %

354

raise RevlogError(_("index %s invalid flags %x for format v0" %

355

(self.indexfile, flags)))

355

(self.indexfile, flags)))

356

elif fmt == REVLOGNG:

356

elif fmt == REVLOGNG:

357

if flags & ~REVLOGNGINLINEDATA:

357

if flags & ~REVLOGNGINLINEDATA:

358

raise RevlogError(_("index %s invalid flags %x for revlogng" %

358

raise RevlogError(_("index %s invalid flags %x for revlogng" %

359

(self.indexfile, flags)))

359

(self.indexfile, flags)))

360

else:

360

else:

361

raise RevlogError(_("index %s invalid format %d" %

361

raise RevlogError(_("index %s invalid format %d" %

362

(self.indexfile, fmt)))

362

(self.indexfile, fmt)))

363

self.version = v

363

self.version = v

364

if v == REVLOGV0:

364

if v == REVLOGV0:

365

self.indexformat = indexformatv0

365

self.indexformat = indexformatv0

366

shaoffset = v0shaoffset

366

shaoffset = v0shaoffset

367

else:

367

else:

368

self.indexformat = indexformatng

368

self.indexformat = indexformatng

369

shaoffset = ngshaoffset

369

shaoffset = ngshaoffset

370

371

if i:

371

if i:

372

if (lazyparser.safe_to_use and not self.inlinedata() and

372

if (lazyparser.safe_to_use and not self.inlinedata() and

373

st and st.st_size > 10000):

373

st and st.st_size > 10000):

374

# big index, let's parse it on demand

374

# big index, let's parse it on demand

375

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

375

parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)

376

self.index = lazyindex(parser)

376

self.index = lazyindex(parser)

377

self.nodemap = lazymap(parser)

377

self.nodemap = lazymap(parser)

378

else:

378

else:

379

self.parseindex(f, st)

379

self.parseindex(f, st)

380

if self.version != REVLOGV0:

380

if self.version != REVLOGV0:

381

e = list(self.index[0])

381

e = list(self.index[0])

382

type = self.ngtype(e[0])

382

type = self.ngtype(e[0])

383

e[0] = self.offset_type(0, type)

383

e[0] = self.offset_type(0, type)

384

self.index[0] = e

384

self.index[0] = e

385

else:

385

else:

386

self.nodemap = { nullid: -1}

386

self.nodemap = { nullid: -1}

387

self.index = []

387

self.index = []

388

389

390

def parseindex(self, fp, st):

390

def parseindex(self, fp, st):

391

s = struct.calcsize(self.indexformat)

391

s = struct.calcsize(self.indexformat)

392

self.index = []

392

self.index = []

393

self.nodemap = {nullid: -1}

393

self.nodemap = {nullid: -1}

394

inline = self.inlinedata()

394

inline = self.inlinedata()

395

n = 0

395

n = 0

396

leftover = None

396

leftover = None

397

while True:

397

while True:

398

if st:

398

if st:

399

data = fp.read(65536)

399

data = fp.read(65536)

400

else:

400

else:

401

# hack for httprangereader, it doesn't do partial reads well

401

# hack for httprangereader, it doesn't do partial reads well

402

data = fp.read()

402

data = fp.read()

403

if not data:

403

if not data:

404

break

404

break

405

if n == 0 and self.inlinedata():

405

if n == 0 and self.inlinedata():

406

# cache the first chunk

406

# cache the first chunk

407

self.chunkcache = (0, data)

407

self.chunkcache = (0, data)

408

if leftover:

408

if leftover:

409

data = leftover + data

409

data = leftover + data

410

leftover = None

410

leftover = None

411

off = 0

411

off = 0

412

l = len(data)

412

l = len(data)

413

while off < l:

413

while off < l:

414

if l - off < s:

414

if l - off < s:

415

leftover = data[off:]

415

leftover = data[off:]

416

break

416

break

417

cur = data[off:off + s]

417

cur = data[off:off + s]

418

off += s

418

off += s

419

e = struct.unpack(self.indexformat, cur)

419

e = struct.unpack(self.indexformat, cur)

420

self.index.append(e)

420

self.index.append(e)

421

self.nodemap[e[-1]] = n

421

self.nodemap[e[-1]] = n

422

n += 1

422

n += 1

423

if inline:

423

if inline:

424

off += e[1]

424

off += e[1]

425

if off > l:

425

if off > l:

426

# some things don't seek well, just read it

426

# some things don't seek well, just read it

427

fp.read(off - l)

427

fp.read(off - l)

428

if not st:

428

if not st:

429

break

429

break

430

431

432

def ngoffset(self, q):

432

def ngoffset(self, q):

433

if q & 0xFFFF:

433

if q & 0xFFFF:

434

raise RevlogError(_('%s: incompatible revision flag %x') %

434

raise RevlogError(_('%s: incompatible revision flag %x') %

435

(self.indexfile, q))

435

(self.indexfile, q))

436

return long(q >> 16)

436

return long(q >> 16)

437

438

def ngtype(self, q):

438

def ngtype(self, q):

439

return int(q & 0xFFFF)

439

return int(q & 0xFFFF)

440

441

def offset_type(self, offset, type):

441

def offset_type(self, offset, type):

442

return long(long(offset) << 16 | type)

442

return long(long(offset) << 16 | type)

443

444

def loadindex(self, start, end):

444

def loadindex(self, start, end):

445

"""load a block of indexes all at once from the lazy parser"""

445

"""load a block of indexes all at once from the lazy parser"""

446

if isinstance(self.index, lazyindex):

446

if isinstance(self.index, lazyindex):

447

self.index.p.loadindex(start, end)

447

self.index.p.loadindex(start, end)

448

449

def loadindexmap(self):

449

def loadindexmap(self):

450

"""loads both the map and the index from the lazy parser"""

450

"""loads both the map and the index from the lazy parser"""

451

if isinstance(self.index, lazyindex):

451

if isinstance(self.index, lazyindex):

452

p = self.index.p

452

p = self.index.p

453

p.loadindex()

453

p.loadindex()

454

self.nodemap = p.map

454

self.nodemap = p.map

455

456

def loadmap(self):

456

def loadmap(self):

457

"""loads the map from the lazy parser"""

457

"""loads the map from the lazy parser"""

458

if isinstance(self.nodemap, lazymap):

458

if isinstance(self.nodemap, lazymap):

459

self.nodemap.p.loadmap()

459

self.nodemap.p.loadmap()

460

self.nodemap = self.nodemap.p.map

460

self.nodemap = self.nodemap.p.map

461

462

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

462

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

463

def tip(self): return self.node(len(self.index) - 1)

463

def tip(self): return self.node(len(self.index) - 1)

464

def count(self): return len(self.index)

464

def count(self): return len(self.index)

465

def node(self, rev):

465

def node(self, rev):

466

return (rev < 0) and nullid or self.index[rev][-1]

466

return (rev < 0) and nullid or self.index[rev][-1]

467

def rev(self, node):

467

def rev(self, node):

468

try:

468

try:

469

return self.nodemap[node]

469

return self.nodemap[node]

470

except KeyError:

470

except KeyError:

471

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

471

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

472

def linkrev(self, node): return self.index[self.rev(node)][-4]

472

def linkrev(self, node): return self.index[self.rev(node)][-4]

473

def parents(self, node):

473

def parents(self, node):

474

if node == nullid: return (nullid, nullid)

474

if node == nullid: return (nullid, nullid)

475

r = self.rev(node)

475

r = self.rev(node)

476

d = self.index[r][-3:-1]

476

d = self.index[r][-3:-1]

477

if self.version == REVLOGV0:

477

if self.version == REVLOGV0:

478

return d

478

return d

479

return [ self.node(x) for x in d ]

479

return [ self.node(x) for x in d ]

480

def parentrevs(self, rev):

481

if rev == -1:

482

return (-1, -1)

483

d = self.index[rev][-3:-1]

484

if self.version == REVLOGV0:

485

return [ self.rev(x) for x in d ]

486

return d

480

def start(self, rev):

487

def start(self, rev):

481

if rev < 0:

488

if rev < 0:

482

return -1

489

return -1

483

if self.version != REVLOGV0:

490

if self.version != REVLOGV0:

484

return self.ngoffset(self.index[rev][0])

491

return self.ngoffset(self.index[rev][0])

485

return self.index[rev][0]

492

return self.index[rev][0]

486

493

487

def end(self, rev): return self.start(rev) + self.length(rev)

494

def end(self, rev): return self.start(rev) + self.length(rev)

488

495

489

def size(self, rev):

496

def size(self, rev):

490

"""return the length of the uncompressed text for a given revision"""

497

"""return the length of the uncompressed text for a given revision"""

491

l = -1

498

l = -1

492

if self.version != REVLOGV0:

499

if self.version != REVLOGV0:

493

l = self.index[rev][2]

500

l = self.index[rev][2]

494

if l >= 0:

501

if l >= 0:

495

return l

502

return l

496

503

497

t = self.revision(self.node(rev))

504

t = self.revision(self.node(rev))

498

return len(t)

505

return len(t)

499

506

500

# alternate implementation, The advantage to this code is it

507

# alternate implementation, The advantage to this code is it

501

# will be faster for a single revision. But, the results are not

508

# will be faster for a single revision. But, the results are not

502

# cached, so finding the size of every revision will be slower.

509

# cached, so finding the size of every revision will be slower.

503

"""

510

"""

504

if self.cache and self.cache[1] == rev:

511

if self.cache and self.cache[1] == rev:

505

return len(self.cache[2])

512

return len(self.cache[2])

506

513

507

base = self.base(rev)

514

base = self.base(rev)

508

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

515

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

509

base = self.cache[1]

516

base = self.cache[1]

510

text = self.cache[2]

517

text = self.cache[2]

511

else:

518

else:

512

text = self.revision(self.node(base))

519

text = self.revision(self.node(base))

513

520

514

l = len(text)

521

l = len(text)

515

for x in xrange(base + 1, rev + 1):

522

for x in xrange(base + 1, rev + 1):

516

l = mdiff.patchedsize(l, self.chunk(x))

523

l = mdiff.patchedsize(l, self.chunk(x))

517

return l

524

return l

518

"""

525

"""

519

526

520

def length(self, rev):

527

def length(self, rev):

521

if rev < 0:

528

if rev < 0:

522

return 0

529

return 0

523

else:

530

else:

524

return self.index[rev][1]

531

return self.index[rev][1]

525

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

532

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

526

533

527

def reachable(self, rev, stop=None):

534

def reachable(self, rev, stop=None):

528

reachable = {}

535

reachable = {}

529

visit = [rev]

536

visit = [rev]

530

reachable[rev] = 1

537

reachable[rev] = 1

531

if stop:

538

if stop:

532

stopn = self.rev(stop)

539

stopn = self.rev(stop)

533

else:

540

else:

534

stopn = 0

541

stopn = 0

535

while visit:

542

while visit:

536

n = visit.pop(0)

543

n = visit.pop(0)

537

if n == stop:

544

if n == stop:

538

continue

545

continue

539

if n == nullid:

546

if n == nullid:

540

continue

547

continue

541

for p in self.parents(n):

548

for p in self.parents(n):

542

if self.rev(p) < stopn:

549

if self.rev(p) < stopn:

543

continue

550

continue

544

if p not in reachable:

551

if p not in reachable:

545

reachable[p] = 1

552

reachable[p] = 1

546

visit.append(p)

553

visit.append(p)

547

return reachable

554

return reachable

548

555

549

def nodesbetween(self, roots=None, heads=None):

556

def nodesbetween(self, roots=None, heads=None):

550

"""Return a tuple containing three elements. Elements 1 and 2 contain

557

"""Return a tuple containing three elements. Elements 1 and 2 contain

551

a final list bases and heads after all the unreachable ones have been

558

a final list bases and heads after all the unreachable ones have been

552

pruned. Element 0 contains a topologically sorted list of all

559

pruned. Element 0 contains a topologically sorted list of all

553

560

554

nodes that satisfy these constraints:

561

nodes that satisfy these constraints:

555

1. All nodes must be descended from a node in roots (the nodes on

562

1. All nodes must be descended from a node in roots (the nodes on

556

roots are considered descended from themselves).

563

roots are considered descended from themselves).

557

2. All nodes must also be ancestors of a node in heads (the nodes in

564

2. All nodes must also be ancestors of a node in heads (the nodes in

558

heads are considered to be their own ancestors).

565

heads are considered to be their own ancestors).

559

566

560

If roots is unspecified, nullid is assumed as the only root.

567

If roots is unspecified, nullid is assumed as the only root.

561

If heads is unspecified, it is taken to be the output of the

568

If heads is unspecified, it is taken to be the output of the

562

heads method (i.e. a list of all nodes in the repository that

569

heads method (i.e. a list of all nodes in the repository that

563

have no children)."""

570

have no children)."""

564

nonodes = ([], [], [])

571

nonodes = ([], [], [])

565

if roots is not None:

572

if roots is not None:

566

roots = list(roots)

573

roots = list(roots)

567

if not roots:

574

if not roots:

568

return nonodes

575

return nonodes

569

lowestrev = min([self.rev(n) for n in roots])

576

lowestrev = min([self.rev(n) for n in roots])

570

else:

577

else:

571

roots = [nullid] # Everybody's a descendent of nullid

578

roots = [nullid] # Everybody's a descendent of nullid

572

lowestrev = -1

579

lowestrev = -1

573

if (lowestrev == -1) and (heads is None):

580

if (lowestrev == -1) and (heads is None):

574

# We want _all_ the nodes!

581

# We want _all_ the nodes!

575

return ([self.node(r) for r in xrange(0, self.count())],

582

return ([self.node(r) for r in xrange(0, self.count())],

576

[nullid], list(self.heads()))

583

[nullid], list(self.heads()))

577

if heads is None:

584

if heads is None:

578

# All nodes are ancestors, so the latest ancestor is the last

585

# All nodes are ancestors, so the latest ancestor is the last

579

# node.

586

# node.

580

highestrev = self.count() - 1

587

highestrev = self.count() - 1

581

# Set ancestors to None to signal that every node is an ancestor.

588

# Set ancestors to None to signal that every node is an ancestor.

582

ancestors = None

589

ancestors = None

583

# Set heads to an empty dictionary for later discovery of heads

590

# Set heads to an empty dictionary for later discovery of heads

584

heads = {}

591

heads = {}

585

else:

592

else:

586

heads = list(heads)

593

heads = list(heads)

587

if not heads:

594

if not heads:

588

return nonodes

595

return nonodes

589

ancestors = {}

596

ancestors = {}

590

# Start at the top and keep marking parents until we're done.

597

# Start at the top and keep marking parents until we're done.

591

nodestotag = heads[:]

598

nodestotag = heads[:]

592

# Turn heads into a dictionary so we can remove 'fake' heads.

599

# Turn heads into a dictionary so we can remove 'fake' heads.

593

# Also, later we will be using it to filter out the heads we can't

600

# Also, later we will be using it to filter out the heads we can't

594

# find from roots.

601

# find from roots.

595

heads = dict.fromkeys(heads, 0)

602

heads = dict.fromkeys(heads, 0)

596

# Remember where the top was so we can use it as a limit later.

603

# Remember where the top was so we can use it as a limit later.

597

highestrev = max([self.rev(n) for n in nodestotag])

604

highestrev = max([self.rev(n) for n in nodestotag])

598

while nodestotag:

605

while nodestotag:

599

# grab a node to tag

606

# grab a node to tag

600

n = nodestotag.pop()

607

n = nodestotag.pop()

601

# Never tag nullid

608

# Never tag nullid

602

if n == nullid:

609

if n == nullid:

603

continue

610

continue

604

# A node's revision number represents its place in a

611

# A node's revision number represents its place in a

605

# topologically sorted list of nodes.

612

# topologically sorted list of nodes.

606

r = self.rev(n)

613

r = self.rev(n)

607

if r >= lowestrev:

614

if r >= lowestrev:

608

if n not in ancestors:

615

if n not in ancestors:

609

# If we are possibly a descendent of one of the roots

616

# If we are possibly a descendent of one of the roots

610

# and we haven't already been marked as an ancestor

617

# and we haven't already been marked as an ancestor

611

ancestors[n] = 1 # Mark as ancestor

618

ancestors[n] = 1 # Mark as ancestor

612

# Add non-nullid parents to list of nodes to tag.

619

# Add non-nullid parents to list of nodes to tag.

613

nodestotag.extend([p for p in self.parents(n) if

620

nodestotag.extend([p for p in self.parents(n) if

614

p != nullid])

621

p != nullid])

615

elif n in heads: # We've seen it before, is it a fake head?

622

elif n in heads: # We've seen it before, is it a fake head?

616

# So it is, real heads should not be the ancestors of

623

# So it is, real heads should not be the ancestors of

617

# any other heads.

624

# any other heads.

618

heads.pop(n)

625

heads.pop(n)

619

if not ancestors:

626

if not ancestors:

620

return nonodes

627

return nonodes

621

# Now that we have our set of ancestors, we want to remove any

628

# Now that we have our set of ancestors, we want to remove any

622

# roots that are not ancestors.

629

# roots that are not ancestors.

623

630

624

# If one of the roots was nullid, everything is included anyway.

631

# If one of the roots was nullid, everything is included anyway.

625

if lowestrev > -1:

632

if lowestrev > -1:

626

# But, since we weren't, let's recompute the lowest rev to not

633

# But, since we weren't, let's recompute the lowest rev to not

627

# include roots that aren't ancestors.

634

# include roots that aren't ancestors.

628

635

629

# Filter out roots that aren't ancestors of heads

636

# Filter out roots that aren't ancestors of heads

630

roots = [n for n in roots if n in ancestors]

637

roots = [n for n in roots if n in ancestors]

631

# Recompute the lowest revision

638

# Recompute the lowest revision

632

if roots:

639

if roots:

633

lowestrev = min([self.rev(n) for n in roots])

640

lowestrev = min([self.rev(n) for n in roots])

634

else:

641

else:

635

# No more roots? Return empty list

642

# No more roots? Return empty list

636

return nonodes

643

return nonodes

637

else:

644

else:

638

# We are descending from nullid, and don't need to care about

645

# We are descending from nullid, and don't need to care about

639

# any other roots.

646

# any other roots.

640

lowestrev = -1

647

lowestrev = -1

641

roots = [nullid]

648

roots = [nullid]

642

# Transform our roots list into a 'set' (i.e. a dictionary where the

649

# Transform our roots list into a 'set' (i.e. a dictionary where the

643

# values don't matter.

650

# values don't matter.

644

descendents = dict.fromkeys(roots, 1)

651

descendents = dict.fromkeys(roots, 1)

645

# Also, keep the original roots so we can filter out roots that aren't

652

# Also, keep the original roots so we can filter out roots that aren't

646

# 'real' roots (i.e. are descended from other roots).

653

# 'real' roots (i.e. are descended from other roots).

647

roots = descendents.copy()

654

roots = descendents.copy()

648

# Our topologically sorted list of output nodes.

655

# Our topologically sorted list of output nodes.

649

orderedout = []

656

orderedout = []

650

# Don't start at nullid since we don't want nullid in our output list,

657

# Don't start at nullid since we don't want nullid in our output list,

651

# and if nullid shows up in descedents, empty parents will look like

658

# and if nullid shows up in descedents, empty parents will look like

652

# they're descendents.

659

# they're descendents.

653

for r in xrange(max(lowestrev, 0), highestrev + 1):

660

for r in xrange(max(lowestrev, 0), highestrev + 1):

654

n = self.node(r)

661

n = self.node(r)

655

isdescendent = False

662

isdescendent = False

656

if lowestrev == -1: # Everybody is a descendent of nullid

663

if lowestrev == -1: # Everybody is a descendent of nullid

657

isdescendent = True

664

isdescendent = True

658

elif n in descendents:

665

elif n in descendents:

659

# n is already a descendent

666

# n is already a descendent

660

isdescendent = True

667

isdescendent = True

661

# This check only needs to be done here because all the roots

668

# This check only needs to be done here because all the roots

662

# will start being marked is descendents before the loop.

669

# will start being marked is descendents before the loop.

663

if n in roots:

670

if n in roots:

664

# If n was a root, check if it's a 'real' root.

671

# If n was a root, check if it's a 'real' root.

665

p = tuple(self.parents(n))

672

p = tuple(self.parents(n))

666

# If any of its parents are descendents, it's not a root.

673

# If any of its parents are descendents, it's not a root.

667

if (p[0] in descendents) or (p[1] in descendents):

674

if (p[0] in descendents) or (p[1] in descendents):

668

roots.pop(n)

675

roots.pop(n)

669

else:

676

else:

670

p = tuple(self.parents(n))

677

p = tuple(self.parents(n))

671

# A node is a descendent if either of its parents are

678

# A node is a descendent if either of its parents are

672

# descendents. (We seeded the dependents list with the roots

679

# descendents. (We seeded the dependents list with the roots

673

# up there, remember?)

680

# up there, remember?)

674

if (p[0] in descendents) or (p[1] in descendents):

681

if (p[0] in descendents) or (p[1] in descendents):

675

descendents[n] = 1

682

descendents[n] = 1

676

isdescendent = True

683

isdescendent = True

677

if isdescendent and ((ancestors is None) or (n in ancestors)):

684

if isdescendent and ((ancestors is None) or (n in ancestors)):

678

# Only include nodes that are both descendents and ancestors.

685

# Only include nodes that are both descendents and ancestors.

679

orderedout.append(n)

686

orderedout.append(n)

680

if (ancestors is not None) and (n in heads):

687

if (ancestors is not None) and (n in heads):

681

# We're trying to figure out which heads are reachable

688

# We're trying to figure out which heads are reachable

682

# from roots.

689

# from roots.

683

# Mark this head as having been reached

690

# Mark this head as having been reached

684

heads[n] = 1

691

heads[n] = 1

685

elif ancestors is None:

692

elif ancestors is None:

686

# Otherwise, we're trying to discover the heads.

693

# Otherwise, we're trying to discover the heads.

687

# Assume this is a head because if it isn't, the next step

694

# Assume this is a head because if it isn't, the next step

688

# will eventually remove it.

695

# will eventually remove it.

689

heads[n] = 1

696

heads[n] = 1

690

# But, obviously its parents aren't.

697

# But, obviously its parents aren't.

691

for p in self.parents(n):

698

for p in self.parents(n):

692

heads.pop(p, None)

699

heads.pop(p, None)

693

heads = [n for n in heads.iterkeys() if heads[n] != 0]

700

heads = [n for n in heads.iterkeys() if heads[n] != 0]

694

roots = roots.keys()

701

roots = roots.keys()

695

assert orderedout

702

assert orderedout

696

assert roots

703

assert roots

697

assert heads

704

assert heads

698

return (orderedout, roots, heads)

705

return (orderedout, roots, heads)

699

706

700

def heads(self, start=None):

707

def heads(self, start=None):

701

"""return the list of all nodes that have no children

708

"""return the list of all nodes that have no children

702

709

703

if start is specified, only heads that are descendants of

710

if start is specified, only heads that are descendants of

704

start will be returned

711

start will be returned

705

712

706

"""

713

"""

707

if start is None:

714

if start is None:

708

start = nullid

715

start = nullid

709

reachable = {start: 1}

716

reachable = {start: 1}

710

heads = {start: 1}

717

heads = {start: 1}

711

startrev = self.rev(start)

718

startrev = self.rev(start)

712

719

713

for r in xrange(startrev + 1, self.count()):

720

for r in xrange(startrev + 1, self.count()):

714

n = self.node(r)

721

n = self.node(r)

715

for pn in self.parents(n):

722

for pn in self.parents(n):

716

if pn in reachable:

723

if pn in reachable:

717

reachable[n] = 1

724

reachable[n] = 1

718

heads[n] = 1

725

heads[n] = 1

719

if pn in heads:

726

if pn in heads:

720

del heads[pn]

727

del heads[pn]

721

return heads.keys()

728

return heads.keys()

722

729

723

def children(self, node):

730

def children(self, node):

724

"""find the children of a given node"""

731

"""find the children of a given node"""

725

c = []

732

c = []

726

p = self.rev(node)

733

p = self.rev(node)

727

for r in range(p + 1, self.count()):

734

for r in range(p + 1, self.count()):

728

n = self.node(r)

735

n = self.node(r)

729

for pn in self.parents(n):

736

for pn in self.parents(n):

730

if pn == node:

737

if pn == node:

731

c.append(n)

738

c.append(n)

732

continue

739

continue

733

elif pn == nullid:

740

elif pn == nullid:

734

continue

741

continue

735

return c

742

return c

736

743

737

def lookup(self, id):

744

def lookup(self, id):

738

"""locate a node based on revision number or subset of hex nodeid"""

745

"""locate a node based on revision number or subset of hex nodeid"""

739

try:

746

try:

740

rev = int(id)

747

rev = int(id)

741

if str(rev) != id: raise ValueError

748

if str(rev) != id: raise ValueError

742

if rev < 0: rev = self.count() + rev

749

if rev < 0: rev = self.count() + rev

743

if rev < 0 or rev >= self.count(): raise ValueError

750

if rev < 0 or rev >= self.count(): raise ValueError

744

return self.node(rev)

751

return self.node(rev)

745

except (ValueError, OverflowError):

752

except (ValueError, OverflowError):

746

c = []

753

c = []

747

for n in self.nodemap:

754

for n in self.nodemap:

748

if hex(n).startswith(id):

755

if hex(n).startswith(id):

749

c.append(n)

756

c.append(n)

750

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

757

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

751

if len(c) < 1: raise RevlogError(_("No match found"))

758

if len(c) < 1: raise RevlogError(_("No match found"))

752

return c[0]

759

return c[0]

753

760

754

return None

761

return None

755

762

756

def diff(self, a, b):

763

def diff(self, a, b):

757

"""return a delta between two revisions"""

764

"""return a delta between two revisions"""

758

return mdiff.textdiff(a, b)

765

return mdiff.textdiff(a, b)

759

766

760

def patches(self, t, pl):

767

def patches(self, t, pl):

761

"""apply a list of patches to a string"""

768

"""apply a list of patches to a string"""

762

return mdiff.patches(t, pl)

769

return mdiff.patches(t, pl)

763

770

764

def chunk(self, rev, df=None, cachelen=4096):

771

def chunk(self, rev, df=None, cachelen=4096):

765

start, length = self.start(rev), self.length(rev)

772

start, length = self.start(rev), self.length(rev)

766

inline = self.inlinedata()

773

inline = self.inlinedata()

767

if inline:

774

if inline:

768

start += (rev + 1) * struct.calcsize(self.indexformat)

775

start += (rev + 1) * struct.calcsize(self.indexformat)

769

end = start + length

776

end = start + length

770

def loadcache(df):

777

def loadcache(df):

771

cache_length = max(cachelen, length) # 4k

778

cache_length = max(cachelen, length) # 4k

772

if not df:

779

if not df:

773

if inline:

780

if inline:

774

df = self.opener(self.indexfile)

781

df = self.opener(self.indexfile)

775

else:

782

else:

776

df = self.opener(self.datafile)

783

df = self.opener(self.datafile)

777

df.seek(start)

784

df.seek(start)

778

self.chunkcache = (start, df.read(cache_length))

785

self.chunkcache = (start, df.read(cache_length))

779

786

780

if not self.chunkcache:

787

if not self.chunkcache:

781

loadcache(df)

788

loadcache(df)

782

789

783

cache_start = self.chunkcache[0]

790

cache_start = self.chunkcache[0]

784

cache_end = cache_start + len(self.chunkcache[1])

791

cache_end = cache_start + len(self.chunkcache[1])

785

if start >= cache_start and end <= cache_end:

792

if start >= cache_start and end <= cache_end:

786

# it is cached

793

# it is cached

787

offset = start - cache_start

794

offset = start - cache_start

788

else:

795

else:

789

loadcache(df)

796

loadcache(df)

790

offset = 0

797

offset = 0

791

798

792

#def checkchunk():

799

#def checkchunk():

793

# df = self.opener(self.datafile)

800

# df = self.opener(self.datafile)

794

# df.seek(start)

801

# df.seek(start)

795

# return df.read(length)

802

# return df.read(length)

796

#assert s == checkchunk()

803

#assert s == checkchunk()

797

return decompress(self.chunkcache[1][offset:offset + length])

804

return decompress(self.chunkcache[1][offset:offset + length])

798

805

799

def delta(self, node):

806

def delta(self, node):

800

"""return or calculate a delta between a node and its predecessor"""

807

"""return or calculate a delta between a node and its predecessor"""

801

r = self.rev(node)

808

r = self.rev(node)

802

return self.revdiff(r - 1, r)

809

return self.revdiff(r - 1, r)

803

810

804

def revdiff(self, rev1, rev2):

811

def revdiff(self, rev1, rev2):

805

"""return or calculate a delta between two revisions"""

812

"""return or calculate a delta between two revisions"""

806

b1 = self.base(rev1)

813

b1 = self.base(rev1)

807

b2 = self.base(rev2)

814

b2 = self.base(rev2)

808

if b1 == b2 and rev1 + 1 == rev2:

815

if b1 == b2 and rev1 + 1 == rev2:

809

return self.chunk(rev2)

816

return self.chunk(rev2)

810

else:

817

else:

811

return self.diff(self.revision(self.node(rev1)),

818

return self.diff(self.revision(self.node(rev1)),

812

self.revision(self.node(rev2)))

819

self.revision(self.node(rev2)))

813

820

814

def revision(self, node):

821

def revision(self, node):

815

"""return an uncompressed revision of a given"""

822

"""return an uncompressed revision of a given"""

816

if node == nullid: return ""

823

if node == nullid: return ""

817

if self.cache and self.cache[0] == node: return self.cache[2]

824

if self.cache and self.cache[0] == node: return self.cache[2]

818

825

819

# look up what we need to read

826

# look up what we need to read

820

text = None

827

text = None

821

rev = self.rev(node)

828

rev = self.rev(node)

822

base = self.base(rev)

829

base = self.base(rev)

823

830

824

if self.inlinedata():

831

if self.inlinedata():

825

# we probably have the whole chunk cached

832

# we probably have the whole chunk cached

826

df = None

833

df = None

827

else:

834

else:

828

df = self.opener(self.datafile)

835

df = self.opener(self.datafile)

829

836

830

# do we have useful data cached?

837

# do we have useful data cached?

831

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

838

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

832

base = self.cache[1]

839

base = self.cache[1]

833

text = self.cache[2]

840

text = self.cache[2]

834

self.loadindex(base, rev + 1)

841

self.loadindex(base, rev + 1)

835

else:

842

else:

836

self.loadindex(base, rev + 1)

843

self.loadindex(base, rev + 1)

837

text = self.chunk(base, df=df)

844

text = self.chunk(base, df=df)

838

845

839

bins = []

846

bins = []

840

for r in xrange(base + 1, rev + 1):

847

for r in xrange(base + 1, rev + 1):

841

bins.append(self.chunk(r, df=df))

848

bins.append(self.chunk(r, df=df))

842

849

843

text = self.patches(text, bins)

850

text = self.patches(text, bins)

844

851

845

p1, p2 = self.parents(node)

852

p1, p2 = self.parents(node)

846

if node != hash(text, p1, p2):

853

if node != hash(text, p1, p2):

847

raise RevlogError(_("integrity check failed on %s:%d")

854

raise RevlogError(_("integrity check failed on %s:%d")

848

% (self.datafile, rev))

855

% (self.datafile, rev))

849

856

850

self.cache = (node, rev, text)

857

self.cache = (node, rev, text)

851

return text

858

return text

852

859

853

def checkinlinesize(self, tr, fp=None):

860

def checkinlinesize(self, tr, fp=None):

854

if not self.inlinedata():

861

if not self.inlinedata():

855

return

862

return

856

if not fp:

863

if not fp:

857

fp = self.opener(self.indexfile, 'r')

864

fp = self.opener(self.indexfile, 'r')

858

fp.seek(0, 2)

865

fp.seek(0, 2)

859

size = fp.tell()

866

size = fp.tell()

860

if size < 131072:

867

if size < 131072:

861

return

868

return

862

trinfo = tr.find(self.indexfile)

869

trinfo = tr.find(self.indexfile)

863

if trinfo == None:

870

if trinfo == None:

864

raise RevlogError(_("%s not found in the transaction" %

871

raise RevlogError(_("%s not found in the transaction" %

865

self.indexfile))

872

self.indexfile))

866

873

867

trindex = trinfo[2]

874

trindex = trinfo[2]

868

dataoff = self.start(trindex)

875

dataoff = self.start(trindex)

869

876

870

tr.add(self.datafile, dataoff)

877

tr.add(self.datafile, dataoff)

871

df = self.opener(self.datafile, 'w')

878

df = self.opener(self.datafile, 'w')

872

calc = struct.calcsize(self.indexformat)

879

calc = struct.calcsize(self.indexformat)

873

for r in xrange(self.count()):

880

for r in xrange(self.count()):

874

start = self.start(r) + (r + 1) * calc

881

start = self.start(r) + (r + 1) * calc

875

length = self.length(r)

882

length = self.length(r)

876

fp.seek(start)

883

fp.seek(start)

877

d = fp.read(length)

884

d = fp.read(length)

878

df.write(d)

885

df.write(d)

879

fp.close()

886

fp.close()

880

df.close()

887

df.close()

881

fp = self.opener(self.indexfile, 'w', atomictemp=True)

888

fp = self.opener(self.indexfile, 'w', atomictemp=True)

882

self.version &= ~(REVLOGNGINLINEDATA)

889

self.version &= ~(REVLOGNGINLINEDATA)

883

if self.count():

890

if self.count():

884

x = self.index[0]

891

x = self.index[0]

885

e = struct.pack(self.indexformat, *x)[4:]

892

e = struct.pack(self.indexformat, *x)[4:]

886

l = struct.pack(versionformat, self.version)

893

l = struct.pack(versionformat, self.version)

887

fp.write(l)

894

fp.write(l)

888

fp.write(e)

895

fp.write(e)

889

896

890

for i in xrange(1, self.count()):

897

for i in xrange(1, self.count()):

891

x = self.index[i]

898

x = self.index[i]

892

e = struct.pack(self.indexformat, *x)

899

e = struct.pack(self.indexformat, *x)

893

fp.write(e)

900

fp.write(e)

894

901

895

# if we don't call rename, the temp file will never replace the

902

# if we don't call rename, the temp file will never replace the

896

# real index

903

# real index

897

fp.rename()

904

fp.rename()

898

905

899

tr.replace(self.indexfile, trindex * calc)

906

tr.replace(self.indexfile, trindex * calc)

900

self.chunkcache = None

907

self.chunkcache = None

901

908

902

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

909

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

903

"""add a revision to the log

910

"""add a revision to the log

904

911

905

text - the revision data to add

912

text - the revision data to add

906

transaction - the transaction object used for rollback

913

transaction - the transaction object used for rollback

907

link - the linkrev data to add

914

link - the linkrev data to add

908

p1, p2 - the parent nodeids of the revision

915

p1, p2 - the parent nodeids of the revision

909

d - an optional precomputed delta

916

d - an optional precomputed delta

910

"""

917

"""

911

if text is None: text = ""

918

if text is None: text = ""

912

if p1 is None: p1 = self.tip()

919

if p1 is None: p1 = self.tip()

913

if p2 is None: p2 = nullid

920

if p2 is None: p2 = nullid

914

921

915

node = hash(text, p1, p2)

922

node = hash(text, p1, p2)

916

923

917

if node in self.nodemap:

924

if node in self.nodemap:

918

return node

925

return node

919

926

920

n = self.count()

927

n = self.count()

921

t = n - 1

928

t = n - 1

922

929

923

if n:

930

if n:

924

base = self.base(t)

931

base = self.base(t)

925

start = self.start(base)

932

start = self.start(base)

926

end = self.end(t)

933

end = self.end(t)

927

if not d:

934

if not d:

928

prev = self.revision(self.tip())

935

prev = self.revision(self.tip())

929

d = self.diff(prev, str(text))

936

d = self.diff(prev, str(text))

930

data = compress(d)

937

data = compress(d)

931

l = len(data[1]) + len(data[0])

938

l = len(data[1]) + len(data[0])

932

dist = end - start + l

939

dist = end - start + l

933

940

934

# full versions are inserted when the needed deltas

941

# full versions are inserted when the needed deltas

935

# become comparable to the uncompressed text

942

# become comparable to the uncompressed text

936

if not n or dist > len(text) * 2:

943

if not n or dist > len(text) * 2:

937

data = compress(text)

944

data = compress(text)

938

l = len(data[1]) + len(data[0])

945

l = len(data[1]) + len(data[0])

939

base = n

946

base = n

940

else:

947

else:

941

base = self.base(t)

948

base = self.base(t)

942

949

943

offset = 0

950

offset = 0

944

if t >= 0:

951

if t >= 0:

945

offset = self.end(t)

952

offset = self.end(t)

946

953

947

if self.version == REVLOGV0:

954

if self.version == REVLOGV0:

948

e = (offset, l, base, link, p1, p2, node)

955

e = (offset, l, base, link, p1, p2, node)

949

else:

956

else:

950

e = (self.offset_type(offset, 0), l, len(text),

957

e = (self.offset_type(offset, 0), l, len(text),

951

base, link, self.rev(p1), self.rev(p2), node)

958

base, link, self.rev(p1), self.rev(p2), node)

952

959

953

self.index.append(e)

960

self.index.append(e)

954

self.nodemap[node] = n

961

self.nodemap[node] = n

955

entry = struct.pack(self.indexformat, *e)

962

entry = struct.pack(self.indexformat, *e)

956

963

957

if not self.inlinedata():

964

if not self.inlinedata():

958

transaction.add(self.datafile, offset)

965

transaction.add(self.datafile, offset)

959

transaction.add(self.indexfile, n * len(entry))

966

transaction.add(self.indexfile, n * len(entry))

960

f = self.opener(self.datafile, "a")

967

f = self.opener(self.datafile, "a")

961

if data[0]:

968

if data[0]:

962

f.write(data[0])

969

f.write(data[0])

963

f.write(data[1])

970

f.write(data[1])

964

f.close()

971

f.close()

965

f = self.opener(self.indexfile, "a")

972

f = self.opener(self.indexfile, "a")

966

else:

973

else:

967

f = self.opener(self.indexfile, "a+")

974

f = self.opener(self.indexfile, "a+")

968

f.seek(0, 2)

975

f.seek(0, 2)

969

transaction.add(self.indexfile, f.tell(), self.count() - 1)

976

transaction.add(self.indexfile, f.tell(), self.count() - 1)

970

977

971

if len(self.index) == 1 and self.version != REVLOGV0:

978

if len(self.index) == 1 and self.version != REVLOGV0:

972

l = struct.pack(versionformat, self.version)

979

l = struct.pack(versionformat, self.version)

973

f.write(l)

980

f.write(l)

974

entry = entry[4:]

981

entry = entry[4:]

975

982

976

f.write(entry)

983

f.write(entry)

977

984

978

if self.inlinedata():

985

if self.inlinedata():

979

f.write(data[0])

986

f.write(data[0])

980

f.write(data[1])

987

f.write(data[1])

981

self.checkinlinesize(transaction, f)

988

self.checkinlinesize(transaction, f)

982

989

983

self.cache = (node, n, text)

990

self.cache = (node, n, text)

984

return node

991

return node

985

992

986

def ancestor(self, a, b):

993

def ancestor(self, a, b):

987

"""calculate the least common ancestor of nodes a and b"""

994

"""calculate the least common ancestor of nodes a and b"""

988

995

989

# start with some short cuts for the linear cases

996

# start with some short cuts for the linear cases

990

if a == b:

997

if a == b:

991

return a

998

return a

992

ra = self.rev(a)

999

ra = self.rev(a)

993

rb = self.rev(b)

1000

rb = self.rev(b)

994

if ra < rb:

1001

if ra < rb:

995

last = b

1002

last = b

996

first = a

1003

first = a

997

else:

1004

else:

998

last = a

1005

last = a

999

first = b

1006

first = b

1000

1007

1001

# reachable won't include stop in the list, so we have to use a parent

1008

# reachable won't include stop in the list, so we have to use a parent

1002

reachable = self.reachable(last, stop=self.parents(first)[0])

1009

reachable = self.reachable(last, stop=self.parents(first)[0])

1003

if first in reachable:

1010

if first in reachable:

1004

return first

1011

return first

1005

1012

1006

# calculate the distance of every node from root

1013

# calculate the distance of every node from root

1007

dist = {nullid: 0}

1014

dist = {nullid: 0}

1008

for i in xrange(self.count()):

1015

for i in xrange(self.count()):

1009

n = self.node(i)

1016

n = self.node(i)

1010

p1, p2 = self.parents(n)

1017

p1, p2 = self.parents(n)

1011

dist[n] = max(dist[p1], dist[p2]) + 1

1018

dist[n] = max(dist[p1], dist[p2]) + 1

1012

1019

1013

# traverse ancestors in order of decreasing distance from root

1020

# traverse ancestors in order of decreasing distance from root

1014

def ancestors(node):

1021

def ancestors(node):

1015

# we store negative distances because heap returns smallest member

1022

# we store negative distances because heap returns smallest member

1016

h = [(-dist[node], node)]

1023

h = [(-dist[node], node)]

1017

seen = {}

1024

seen = {}

1018

while h:

1025

while h:

1019

d, n = heapq.heappop(h)

1026

d, n = heapq.heappop(h)

1020

if n not in seen:

1027

if n not in seen:

1021

seen[n] = 1

1028

seen[n] = 1

1022

yield (-d, n)

1029

yield (-d, n)

1023

for p in self.parents(n):

1030

for p in self.parents(n):

1024

heapq.heappush(h, (-dist[p], p))

1031

heapq.heappush(h, (-dist[p], p))

1025

1032

1026

def generations(node):

1033

def generations(node):

1027

sg, s = None, {}

1034

sg, s = None, {}

1028

for g,n in ancestors(node):

1035

for g,n in ancestors(node):

1029

if g != sg:

1036

if g != sg:

1030

if sg:

1037

if sg:

1031

yield sg, s

1038

yield sg, s

1032

sg, s = g, {n:1}

1039

sg, s = g, {n:1}

1033

else:

1040

else:

1034

s[n] = 1

1041

s[n] = 1

1035

yield sg, s

1042

yield sg, s

1036

1043

1037

x = generations(a)

1044

x = generations(a)

1038

y = generations(b)

1045

y = generations(b)

1039

gx = x.next()

1046

gx = x.next()

1040

gy = y.next()

1047

gy = y.next()

1041

1048

1042

# increment each ancestor list until it is closer to root than

1049

# increment each ancestor list until it is closer to root than

1043

# the other, or they match

1050

# the other, or they match

1044

while 1:

1051

while 1:

1045

#print "ancestor gen %s %s" % (gx[0], gy[0])

1052

#print "ancestor gen %s %s" % (gx[0], gy[0])

1046

if gx[0] == gy[0]:

1053

if gx[0] == gy[0]:

1047

# find the intersection

1054

# find the intersection

1048

i = [ n for n in gx[1] if n in gy[1] ]

1055

i = [ n for n in gx[1] if n in gy[1] ]

1049

if i:

1056

if i:

1050

return i[0]

1057

return i[0]

1051

else:

1058

else:

1052

#print "next"

1059

#print "next"

1053

gy = y.next()

1060

gy = y.next()

1054

gx = x.next()

1061

gx = x.next()

1055

elif gx[0] < gy[0]:

1062

elif gx[0] < gy[0]:

1056

#print "next y"

1063

#print "next y"

1057

gy = y.next()

1064

gy = y.next()

1058

else:

1065

else:

1059

#print "next x"

1066

#print "next x"

1060

gx = x.next()

1067

gx = x.next()

1061

1068

1062

def group(self, nodelist, lookup, infocollect=None):

1069

def group(self, nodelist, lookup, infocollect=None):

1063

"""calculate a delta group

1070

"""calculate a delta group

1064

1071

1065

Given a list of changeset revs, return a set of deltas and

1072

Given a list of changeset revs, return a set of deltas and

1066

metadata corresponding to nodes. the first delta is

1073

metadata corresponding to nodes. the first delta is

1067

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1074

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1068

have this parent as it has all history before these

1075

have this parent as it has all history before these

1069

changesets. parent is parent[0]

1076

changesets. parent is parent[0]

1070

"""

1077

"""

1071

revs = [self.rev(n) for n in nodelist]

1078

revs = [self.rev(n) for n in nodelist]

1072

1079

1073

# if we don't have any revisions touched by these changesets, bail

1080

# if we don't have any revisions touched by these changesets, bail

1074

if not revs:

1081

if not revs:

1075

yield changegroup.closechunk()

1082

yield changegroup.closechunk()

1076

return

1083

return

1077

1084

1078

# add the parent of the first rev

1085

# add the parent of the first rev

1079

p = self.parents(self.node(revs[0]))[0]

1086

p = self.parents(self.node(revs[0]))[0]

1080

revs.insert(0, self.rev(p))

1087

revs.insert(0, self.rev(p))

1081

1088

1082

# build deltas

1089

# build deltas

1083

for d in xrange(0, len(revs) - 1):

1090

for d in xrange(0, len(revs) - 1):

1084

a, b = revs[d], revs[d + 1]

1091

a, b = revs[d], revs[d + 1]

1085

nb = self.node(b)

1092

nb = self.node(b)

1086

1093

1087

if infocollect is not None:

1094

if infocollect is not None:

1088

infocollect(nb)

1095

infocollect(nb)

1089

1096

1090

d = self.revdiff(a, b)

1097

d = self.revdiff(a, b)

1091

p = self.parents(nb)

1098

p = self.parents(nb)

1092

meta = nb + p[0] + p[1] + lookup(nb)

1099

meta = nb + p[0] + p[1] + lookup(nb)

1093

yield changegroup.genchunk("%s%s" % (meta, d))

1100

yield changegroup.genchunk("%s%s" % (meta, d))

1094

1101

1095

yield changegroup.closechunk()

1102

yield changegroup.closechunk()

1096

1103

1097

def addgroup(self, revs, linkmapper, transaction, unique=0):

1104

def addgroup(self, revs, linkmapper, transaction, unique=0):

1098

"""

1105

"""

1099

add a delta group

1106

add a delta group

1100

1107

1101

given a set of deltas, add them to the revision log. the

1108

given a set of deltas, add them to the revision log. the

1102

first delta is against its parent, which should be in our

1109

first delta is against its parent, which should be in our

1103

log, the rest are against the previous delta.

1110

log, the rest are against the previous delta.

1104

"""

1111

"""

1105

1112

1106

#track the base of the current delta log

1113

#track the base of the current delta log

1107

r = self.count()

1114

r = self.count()

1108

t = r - 1

1115

t = r - 1

1109

node = None

1116

node = None

1110

1117

1111

base = prev = -1

1118

base = prev = -1

1112

start = end = textlen = 0

1119

start = end = textlen = 0

1113

if r:

1120

if r:

1114

end = self.end(t)

1121

end = self.end(t)

1115

1122

1116

ifh = self.opener(self.indexfile, "a+")

1123

ifh = self.opener(self.indexfile, "a+")

1117

ifh.seek(0, 2)

1124

ifh.seek(0, 2)

1118

transaction.add(self.indexfile, ifh.tell(), self.count())

1125

transaction.add(self.indexfile, ifh.tell(), self.count())

1119

if self.inlinedata():

1126

if self.inlinedata():

1120

dfh = None

1127

dfh = None

1121

else:

1128

else:

1122

transaction.add(self.datafile, end)

1129

transaction.add(self.datafile, end)

1123

dfh = self.opener(self.datafile, "a")

1130

dfh = self.opener(self.datafile, "a")

1124

1131

1125

# loop through our set of deltas

1132

# loop through our set of deltas

1126

chain = None

1133

chain = None

1127

for chunk in revs:

1134

for chunk in revs:

1128

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1135

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1129

link = linkmapper(cs)

1136

link = linkmapper(cs)

1130

if node in self.nodemap:

1137

if node in self.nodemap:

1131

# this can happen if two branches make the same change

1138

# this can happen if two branches make the same change

1132

# if unique:

1139

# if unique:

1133

# raise RevlogError(_("already have %s") % hex(node[:4]))

1140

# raise RevlogError(_("already have %s") % hex(node[:4]))

1134

chain = node

1141

chain = node

1135

continue

1142

continue

1136

delta = chunk[80:]

1143

delta = chunk[80:]

1137

1144

1138

for p in (p1, p2):

1145

for p in (p1, p2):

1139

if not p in self.nodemap:

1146

if not p in self.nodemap:

1140

raise RevlogError(_("unknown parent %s") % short(p))

1147

raise RevlogError(_("unknown parent %s") % short(p))

1141

1148

1142

if not chain:

1149

if not chain:

1143

# retrieve the parent revision of the delta chain

1150

# retrieve the parent revision of the delta chain

1144

chain = p1

1151

chain = p1

1145

if not chain in self.nodemap:

1152

if not chain in self.nodemap:

1146

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1153

raise RevlogError(_("unknown base %s") % short(chain[:4]))

1147

1154

1148

# full versions are inserted when the needed deltas become

1155

# full versions are inserted when the needed deltas become

1149

# comparable to the uncompressed text or when the previous

1156

# comparable to the uncompressed text or when the previous

1150

# version is not the one we have a delta against. We use

1157

# version is not the one we have a delta against. We use

1151

# the size of the previous full rev as a proxy for the

1158

# the size of the previous full rev as a proxy for the

1152

# current size.

1159

# current size.

1153

1160

1154

if chain == prev:

1161

if chain == prev:

1155

tempd = compress(delta)

1162

tempd = compress(delta)

1156

cdelta = tempd[0] + tempd[1]

1163

cdelta = tempd[0] + tempd[1]

1157

textlen = mdiff.patchedsize(textlen, delta)

1164

textlen = mdiff.patchedsize(textlen, delta)

1158

1165

1159

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1166

if chain != prev or (end - start + len(cdelta)) > textlen * 2:

1160

# flush our writes here so we can read it in revision

1167

# flush our writes here so we can read it in revision

1161

if dfh:

1168

if dfh:

1162

dfh.flush()

1169

dfh.flush()

1163

ifh.flush()

1170

ifh.flush()

1164

text = self.revision(chain)

1171

text = self.revision(chain)

1165

text = self.patches(text, [delta])

1172

text = self.patches(text, [delta])

1166

chk = self.addrevision(text, transaction, link, p1, p2)

1173

chk = self.addrevision(text, transaction, link, p1, p2)

1167

if chk != node:

1174

if chk != node:

1168

raise RevlogError(_("consistency error adding group"))

1175

raise RevlogError(_("consistency error adding group"))

1169

textlen = len(text)

1176

textlen = len(text)

1170

else:

1177

else:

1171

if self.version == REVLOGV0:

1178

if self.version == REVLOGV0:

1172

e = (end, len(cdelta), base, link, p1, p2, node)

1179

e = (end, len(cdelta), base, link, p1, p2, node)

1173

else:

1180

else:

1174

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1181

e = (self.offset_type(end, 0), len(cdelta), textlen, base,

1175

link, self.rev(p1), self.rev(p2), node)

1182

link, self.rev(p1), self.rev(p2), node)

1176

self.index.append(e)

1183

self.index.append(e)

1177

self.nodemap[node] = r

1184

self.nodemap[node] = r

1178

if self.inlinedata():

1185

if self.inlinedata():

1179

ifh.write(struct.pack(self.indexformat, *e))

1186

ifh.write(struct.pack(self.indexformat, *e))

1180

ifh.write(cdelta)

1187

ifh.write(cdelta)

1181

self.checkinlinesize(transaction, ifh)

1188

self.checkinlinesize(transaction, ifh)

1182

if not self.inlinedata():

1189

if not self.inlinedata():

1183

dfh = self.opener(self.datafile, "a")

1190

dfh = self.opener(self.datafile, "a")

1184

ifh = self.opener(self.indexfile, "a")

1191

ifh = self.opener(self.indexfile, "a")

1185

else:

1192

else:

1186

if not dfh:

1193

if not dfh:

1187

# addrevision switched from inline to conventional

1194

# addrevision switched from inline to conventional

1188

# reopen the index

1195

# reopen the index

1189

dfh = self.opener(self.datafile, "a")

1196

dfh = self.opener(self.datafile, "a")

1190

ifh = self.opener(self.indexfile, "a")

1197

ifh = self.opener(self.indexfile, "a")

1191

dfh.write(cdelta)

1198

dfh.write(cdelta)

1192

ifh.write(struct.pack(self.indexformat, *e))

1199

ifh.write(struct.pack(self.indexformat, *e))

1193

1200

1194

t, r, chain, prev = r, r + 1, node, node

1201

t, r, chain, prev = r, r + 1, node, node

1195

base = self.base(t)

1202

base = self.base(t)

1196

start = self.start(base)

1203

start = self.start(base)

1197

end = self.end(t)

1204

end = self.end(t)

1198

1205

1199

return node

1206

return node

1200

1207

1201

def strip(self, rev, minlink):

1208

def strip(self, rev, minlink):

1202

if self.count() == 0 or rev >= self.count():

1209

if self.count() == 0 or rev >= self.count():

1203

return

1210

return

1204

1211

1205

if isinstance(self.index, lazyindex):

1212

if isinstance(self.index, lazyindex):

1206

self.loadindexmap()

1213

self.loadindexmap()

1207

1214

1208

# When stripping away a revision, we need to make sure it

1215

# When stripping away a revision, we need to make sure it

1209

# does not actually belong to an older changeset.

1216

# does not actually belong to an older changeset.

1210

# The minlink parameter defines the oldest revision

1217

# The minlink parameter defines the oldest revision

1211

# we're allowed to strip away.

1218

# we're allowed to strip away.

1212

while minlink > self.index[rev][-4]:

1219

while minlink > self.index[rev][-4]:

1213

rev += 1

1220

rev += 1

1214

if rev >= self.count():

1221

if rev >= self.count():

1215

return

1222

return

1216

1223

1217

# first truncate the files on disk

1224

# first truncate the files on disk

1218

end = self.start(rev)

1225

end = self.start(rev)

1219

if not self.inlinedata():

1226

if not self.inlinedata():

1220

df = self.opener(self.datafile, "a")

1227

df = self.opener(self.datafile, "a")

1221

df.truncate(end)

1228

df.truncate(end)

1222

end = rev * struct.calcsize(self.indexformat)

1229

end = rev * struct.calcsize(self.indexformat)

1223

else:

1230

else:

1224

end += rev * struct.calcsize(self.indexformat)

1231

end += rev * struct.calcsize(self.indexformat)

1225

1232

1226

indexf = self.opener(self.indexfile, "a")

1233

indexf = self.opener(self.indexfile, "a")

1227

indexf.truncate(end)

1234

indexf.truncate(end)

1228

1235

1229

# then reset internal state in memory to forget those revisions

1236

# then reset internal state in memory to forget those revisions

1230

self.cache = None

1237

self.cache = None

1231

self.chunkcache = None

1238

self.chunkcache = None

1232

for x in xrange(rev, self.count()):

1239

for x in xrange(rev, self.count()):

1233

del self.nodemap[self.node(x)]

1240

del self.nodemap[self.node(x)]

1234

1241

1235

del self.index[rev:]

1242

del self.index[rev:]

1236

1243

1237

def checksize(self):

1244

def checksize(self):

1238

expected = 0

1245

expected = 0

1239

if self.count():

1246

if self.count():

1240

expected = self.end(self.count() - 1)

1247

expected = self.end(self.count() - 1)

1241

1248

1242

try:

1249

try:

1243

f = self.opener(self.datafile)

1250

f = self.opener(self.datafile)

1244

f.seek(0, 2)

1251

f.seek(0, 2)

1245

actual = f.tell()

1252

actual = f.tell()

1246

dd = actual - expected

1253

dd = actual - expected

1247

except IOError, inst:

1254

except IOError, inst:

1248

if inst.errno != errno.ENOENT:

1255

if inst.errno != errno.ENOENT:

1249

raise

1256

raise

1250

dd = 0

1257

dd = 0

1251

1258

1252

try:

1259

try:

1253

f = self.opener(self.indexfile)

1260

f = self.opener(self.indexfile)

1254

f.seek(0, 2)

1261

f.seek(0, 2)

1255

actual = f.tell()

1262

actual = f.tell()

1256

s = struct.calcsize(self.indexformat)

1263

s = struct.calcsize(self.indexformat)

1257

i = actual / s

1264

i = actual / s

1258

di = actual - (i * s)

1265

di = actual - (i * s)

1259

if self.inlinedata():

1266

if self.inlinedata():

1260

databytes = 0

1267

databytes = 0

1261

for r in xrange(self.count()):

1268

for r in xrange(self.count()):

1262

databytes += self.length(r)

1269

databytes += self.length(r)

1263

dd = 0

1270

dd = 0

1264

di = actual - self.count() * s - databytes

1271

di = actual - self.count() * s - databytes

1265

except IOError, inst:

1272

except IOError, inst:

1266

if inst.errno != errno.ENOENT:

1273

if inst.errno != errno.ENOENT:

1267

raise

1274

raise

1268

di = 0

1275

di = 0

1269

1276

1270

return (dd, di)

1277

return (dd, di)

1271

1278

1272

1279

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii changegroup errno heapq mdiff os")
             demandload(globals(), "sha struct util zlib")
             # revlog version strings
             REVLOGV0 = 0
             REVLOGNG = 1
             # revlog flags
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             def flagstr(flag):
                 if flag == "inline":
                     return REVLOGNGINLINEDATA
                 raise RevlogError(_("unknown revlog flag %s" % flag))
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">i"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
                                                   hasattr(util, 'win32api'))
                 def __init__(self, dataf, size, indexformat, shaoffset):
                     self.dataf = dataf
                     self.format = indexformat
                     self.s = struct.calcsize(indexformat)
                     self.indexformat = indexformat
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                     self.shaoffset = shaoffset
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap: return
                     start = 0
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + self.shaoffset:off + self.shaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all: return
                     if data is None:
                         self.dataf.seek(blockstart)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[self.shaoffset:self.shaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap: return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so weh have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.rfind(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + self.shaoffset:off + self.shaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all: return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (512 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~(32)) * self.s
                             blocksize = self.s * 64
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all: self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     ret = self.p.index[pos] or self.load(pos)
                     if isinstance(ret, str):
                         ret = struct.unpack(self.p.indexformat, ret)
                     return ret
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = item
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         ret = self.p.index[i]
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = struct.unpack(self.p.indexformat, ret)
                         yield ret[-1]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile,
                              defversion=REVLOG_DEFAULT_VERSION):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.indexstat = None
                     self.cache = None
                     self.chunkcache = None
                     self.defversion = defversion
                     self.load()
                 def load(self):
                     v = self.defversion
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     else:
                         try:
                             st = util.fstat(f)
                         except AttributeError, inst:
                             st = None
                         else:
                             oldst = self.indexstat
                             if (oldst and st.st_dev == oldst.st_dev
                                 and st.st_ino == oldst.st_ino
                                 and st.st_mtime == oldst.st_mtime
                                 and st.st_ctime == oldst.st_ctime):
                                 return
                             self.indexstat = st
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i)[0]
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0:
                         if flags:
                             raise RevlogError(_("index %s invalid flags %x for format v0" %
                                                (self.indexfile, flags)))
                     elif fmt == REVLOGNG:
                         if flags & ~REVLOGNGINLINEDATA:
                             raise RevlogError(_("index %s invalid flags %x for revlogng" %
                                                (self.indexfile, flags)))
                     else:
                         raise RevlogError(_("index %s invalid format %d" %
                                            (self.indexfile, fmt)))
                     self.version = v
                     if v == REVLOGV0:
                         self.indexformat = indexformatv0
                         shaoffset = v0shaoffset
                     else:
                         self.indexformat = indexformatng
                         shaoffset = ngshaoffset
                     if i:
                         if (lazyparser.safe_to_use and not self.inlinedata() and
                             st and st.st_size > 10000):
                             # big index, let's parse it on demand
                             parser = lazyparser(f, st.st_size, self.indexformat, shaoffset)
                             self.index = lazyindex(parser)
                             self.nodemap = lazymap(parser)
                         else:
                             self.parseindex(f, st)
                         if self.version != REVLOGV0:
                             e = list(self.index[0])
                             type = self.ngtype(e[0])
                             e[0] = self.offset_type(0, type)
                             self.index[0] = e
                     else:
                         self.nodemap = { nullid: -1}
                         self.index = []
                 def parseindex(self, fp, st):
                     s = struct.calcsize(self.indexformat)
                     self.index = []
                     self.nodemap =  {nullid: -1}
                     inline = self.inlinedata()
                     n = 0
                     leftover = None
                     while True:
                         if st:
                             data = fp.read(65536)
                         else:
                             # hack for httprangereader, it doesn't do partial reads well
                             data = fp.read()
                         if not data:
                             break
                         if n == 0 and self.inlinedata():
                             # cache the first chunk
                             self.chunkcache = (0, data)
                         if leftover:
                             data = leftover + data
                             leftover = None
                         off = 0
                         l = len(data)
                         while off < l:
                             if l - off < s:
                                 leftover = data[off:]
                                 break
                             cur = data[off:off + s]
                             off += s
                             e = struct.unpack(self.indexformat, cur)
                             self.index.append(e)
                             self.nodemap[e[-1]] = n
                             n += 1
                             if inline:
                                 off += e[1]
                                 if off > l:
                                     # some things don't seek well, just read it
                                     fp.read(off - l)
                         if not st:
                             break
                 def ngoffset(self, q):
                     if q & 0xFFFF:
                         raise RevlogError(_('%s: incompatible revision flag %x') %
                                           (self.indexfile, q))
                     return long(q >> 16)
                 def ngtype(self, q):
                     return int(q & 0xFFFF)
                 def offset_type(self, offset, type):
                     return long(long(offset) << 16 | type)
                 def loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev):
                     return (rev < 0) and nullid or self.index[rev][-1]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][-4]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     r = self.rev(node)
                     d = self.index[r][-3:-1]
                     if self.version == REVLOGV0:
                         return d
                     return [ self.node(x) for x in d ]
+                def parentrevs(self, rev):
+                    if rev == -1:
+                        return (-1, -1)
+                    d = self.index[rev][-3:-1]
+                    if self.version == REVLOGV0:
+                        return [ self.rev(x) for x in d ]
+                    return d
                 def start(self, rev):
                     if rev < 0:
                         return -1
                     if self.version != REVLOGV0:
                         return self.ngoffset(self.index[rev][0])
                     return self.index[rev][0]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = -1
                     if self.version != REVLOGV0:
                         l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def length(self, rev):
                     if rev < 0:
                         return 0
                     else:
                         return self.index[rev][1]
                 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     reachable = {start: 1}
                     heads = {start: 1}
                     startrev = self.rev(start)
                     for r in xrange(startrev + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn in reachable:
                                 reachable[n] = 1
                                 heads[n] = 1
                             if pn in heads:
                                 del heads[pn]
                     return heads.keys()
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev, df=None, cachelen=4096):
                     start, length = self.start(rev), self.length(rev)
                     inline = self.inlinedata()
                     if inline:
                         start += (rev + 1) * struct.calcsize(self.indexformat)
                     end = start + length
                     def loadcache(df):
                         cache_length = max(cachelen, length) # 4k
                         if not df:
                             if inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache(df)
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache(df)
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     b1 = self.base(rev1)
                     b2 = self.base(rev2)
                     if b1 == b2 and rev1 + 1 == rev2:
                         return self.chunk(rev2)
                     else:
                         return self.diff(self.revision(self.node(rev1)),
                                          self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     if self.inlinedata():
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                         self.loadindex(base, rev + 1)
                     else:
                         self.loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r, df=df))
                     text = self.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self.inlinedata():
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                         fp.seek(0, 2)
                     size = fp.tell()
                     if size < 131072:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo == None:
                         raise RevlogError(_("%s not found in the transaction"  %
                                           self.indexfile))
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     df = self.opener(self.datafile, 'w')
                     calc = struct.calcsize(self.indexformat)
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     if self.count():
                         x = self.index[0]
                         e = struct.pack(self.indexformat, *x)[4:]
                         l = struct.pack(versionformat, self.version)
                         fp.write(l)
                         fp.write(e)
                     for i in xrange(1, self.count()):
                         x = self.index[i]
                         e = struct.pack(self.indexformat, *x)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * calc)
                     self.chunkcache = None
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     if self.version == REVLOGV0:
                         e = (offset, l, base, link, p1, p2, node)
                     else:
                         e = (self.offset_type(offset, 0), l, len(text),
                              base, link, self.rev(p1), self.rev(p2), node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(self.indexformat, *e)
                     if not self.inlinedata():
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, n * len(entry))
                         f = self.opener(self.datafile, "a")
                         if data[0]:
                             f.write(data[0])
                         f.write(data[1])
                         f.close()
                         f = self.opener(self.indexfile, "a")
                     else:
                         f = self.opener(self.indexfile, "a+")
                         f.seek(0, 2)
                         transaction.add(self.indexfile, f.tell(), self.count() - 1)
                     if len(self.index) == 1 and self.version != REVLOGV0:
                         l = struct.pack(versionformat, self.version)
                         f.write(l)
                         entry = entry[4:]
                     f.write(entry)
                     if self.inlinedata():
                         f.write(data[0])
                         f.write(data[1])
                         self.checkinlinesize(transaction, f)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # start with some short cuts for the linear cases
                     if a == b:
                         return a
                     ra = self.rev(a)
                     rb = self.rev(b)
                     if ra < rb:
                         last = b
                         first = a
                     else:
                         last = a
                         first = b
                     # reachable won't include stop in the list, so we have to use a parent
                     reachable = self.reachable(last, stop=self.parents(first)[0])
                     if first in reachable:
                         return first
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         d = self.revdiff(a, b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         yield changegroup.genchunk("%s%s" % (meta, d))
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = -1
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     ifh.seek(0, 2)
                     transaction.add(self.indexfile, ifh.tell(), self.count())
                     if self.inlinedata():
                         dfh = None
                     else:
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + len(cdelta)) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             if self.version == REVLOGV0:
                                 e = (end, len(cdelta), base, link, p1, p2, node)
                             else:
                                 e = (self.offset_type(end, 0), len(cdelta), textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             if self.inlinedata():
                                 ifh.write(struct.pack(self.indexformat, *e))
                                 ifh.write(cdelta)
                                 self.checkinlinesize(transaction, ifh)
                                 if not self.inlinedata():
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 if not dfh:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 dfh.write(cdelta)
                                 ifh.write(struct.pack(self.indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self.loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][-4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self.inlinedata():
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * struct.calcsize(self.indexformat)
                     else:
                         end += rev * struct.calcsize(self.indexformat)
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
                     self.chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(self.indexformat)
                         i = actual / s
                         di = actual - (i * s)
                         if self.inlinedata():
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += self.length(r)
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)