upstream/mercurial-mirror Commit - r2077:4d0700ae

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

16

demandload(globals(), "binascii changegroup errno heapq mdiff os")

17

demandload(globals(), "sha struct zlib")

17

demandload(globals(), "sha struct zlib")

18

19

# revlog version strings

19

# revlog version strings

20

REVLOGV0 = 0

20

REVLOGV0 = 0

21

REVLOGNG = 1

21

REVLOGNG = 1

22

23

# revlog flags

23

# revlog flags

24

REVLOGNGINLINEDATA = (1 << 16)

24

REVLOGNGINLINEDATA = (1 << 16)

25

26

def flagstr(flag):

26

def flagstr(flag):

27

if flag == "inline":

27

if flag == "inline":

28

return REVLOGNGINLINEDATA

28

return REVLOGNGINLINEDATA

29

raise RevlogError(_("unknown revlog flag %s" % flag))

29

raise RevlogError(_("unknown revlog flag %s" % flag))

30

31

def hash(text, p1, p2):

31

def hash(text, p1, p2):

32

"""generate a hash from the given text and its parent hashes

32

"""generate a hash from the given text and its parent hashes

33

34

This hash combines both the current file contents and its history

34

This hash combines both the current file contents and its history

35

in a manner that makes it easy to distinguish nodes with the same

35

in a manner that makes it easy to distinguish nodes with the same

36

content in the revision graph.

36

content in the revision graph.

37

"""

37

"""

38

l = [p1, p2]

38

l = [p1, p2]

39

l.sort()

39

l.sort()

40

s = sha.new(l[0])

40

s = sha.new(l[0])

41

s.update(l[1])

41

s.update(l[1])

42

s.update(text)

42

s.update(text)

43

return s.digest()

43

return s.digest()

44

45

def compress(text):

45

def compress(text):

46

""" generate a possibly-compressed representation of text """

46

""" generate a possibly-compressed representation of text """

47

if not text: return ("", text)

47

if not text: return ("", text)

48

if len(text) < 44:

48

if len(text) < 44:

49

if text[0] == '\0': return ("", text)

49

if text[0] == '\0': return ("", text)

50

return ('u', text)

50

return ('u', text)

51

bin = zlib.compress(text)

51

bin = zlib.compress(text)

52

if len(bin) > len(text):

52

if len(bin) > len(text):

53

if text[0] == '\0': return ("", text)

53

if text[0] == '\0': return ("", text)

54

return ('u', text)

54

return ('u', text)

55

return ("", bin)

55

return ("", bin)

56

57

def decompress(bin):

57

def decompress(bin):

58

""" decompress the given input """

58

""" decompress the given input """

59

if not bin: return bin

59

if not bin: return bin

60

t = bin[0]

60

t = bin[0]

61

if t == '\0': return bin

61

if t == '\0': return bin

62

if t == 'x': return zlib.decompress(bin)

62

if t == 'x': return zlib.decompress(bin)

63

if t == 'u': return bin[1:]

63

if t == 'u': return bin[1:]

64

raise RevlogError(_("unknown compression type %r") % t)

64

raise RevlogError(_("unknown compression type %r") % t)

65

66

indexformatv0 = ">4l20s20s20s"

66

indexformatv0 = ">4l20s20s20s"

67

# index ng:

67

# index ng:

68

# 6 bytes offset

68

# 6 bytes offset

69

# 2 bytes flags

69

# 2 bytes flags

70

# 4 bytes compressed length

70

# 4 bytes compressed length

71

# 4 bytes uncompressed length

71

# 4 bytes uncompressed length

72

# 4 bytes: base rev

72

# 4 bytes: base rev

73

# 4 bytes link rev

73

# 4 bytes link rev

74

# 4 bytes parent 1 rev

74

# 4 bytes parent 1 rev

75

# 4 bytes parent 2 rev

75

# 4 bytes parent 2 rev

76

# 32 bytes: nodeid

76

# 32 bytes: nodeid

77

indexformatng = ">Qiiiiii20s12x"

77

indexformatng = ">Qiiiiii20s12x"

78

versionformat = ">i"

78

versionformat = ">i"

79

80

class lazyparser(object):

80

class lazyparser(object):

81

"""

81

"""

82

this class avoids the need to parse the entirety of large indices

82

this class avoids the need to parse the entirety of large indices

83

84

By default we parse and load 1000 entries at a time.

84

By default we parse and load 1000 entries at a time.

85

86

If no position is specified, we load the whole index, and replace

86

If no position is specified, we load the whole index, and replace

87

the lazy objects in revlog with the underlying objects for

87

the lazy objects in revlog with the underlying objects for

88

efficiency in cases where we look at most of the nodes.

88

efficiency in cases where we look at most of the nodes.

89

"""

89

"""

90

def __init__(self, data, revlog, indexformat):

90

def __init__(self, data, revlog, indexformat):

91

self.data = data

91

self.data = data

92

self.s = struct.calcsize(indexformat)

92

self.s = struct.calcsize(indexformat)

93

self.indexformat = indexformat

93

self.indexformat = indexformat

94

self.l = len(data)/self.s

94

self.l = len(data)/self.s

95

self.index = [None] * self.l

95

self.index = [None] * self.l

96

self.map = {nullid: -1}

96

self.map = {nullid: -1}

97

self.all = 0

97

self.all = 0

98

self.revlog = revlog

98

self.revlog = revlog

99

100

def load(self, pos=None):

100

def load(self, pos=None):

101

if self.all: return

101

if self.all: return

102

if pos is not None:

102

if pos is not None:

103

block = pos / 1000

103

block = pos / 1000

104

i = block * 1000

104

i = block * 1000

105

end = min(self.l, i + 1000)

105

end = min(self.l, i + 1000)

106

else:

106

else:

107

self.all = 1

107

self.all = 1

108

i = 0

108

i = 0

109

end = self.l

109

end = self.l

110

self.revlog.index = self.index

110

self.revlog.index = self.index

111

self.revlog.nodemap = self.map

111

self.revlog.nodemap = self.map

112

113

while i < end:

113

while i < end:

114

if not self.index[i]:

114

if not self.index[i]:

115

d = self.data[i * self.s: (i + 1) * self.s]

115

d = self.data[i * self.s: (i + 1) * self.s]

116

e = struct.unpack(self.indexformat, d)

116

e = struct.unpack(self.indexformat, d)

117

self.index[i] = e

117

self.index[i] = e

118

self.map[e[-1]] = i

118

self.map[e[-1]] = i

119

i += 1

119

i += 1

120

121

class lazyindex(object):

121

class lazyindex(object):

122

"""a lazy version of the index array"""

122

"""a lazy version of the index array"""

123

def __init__(self, parser):

123

def __init__(self, parser):

124

self.p = parser

124

self.p = parser

125

def __len__(self):

125

def __len__(self):

126

return len(self.p.index)

126

return len(self.p.index)

127

def load(self, pos):

127

def load(self, pos):

128

if pos < 0:

128

if pos < 0:

129

pos += len(self.p.index)

129

pos += len(self.p.index)

130

self.p.load(pos)

130

self.p.load(pos)

131

return self.p.index[pos]

131

return self.p.index[pos]

132

def __getitem__(self, pos):

132

def __getitem__(self, pos):

133

return self.p.index[pos] or self.load(pos)

133

return self.p.index[pos] or self.load(pos)

134

def __setitem__(self, pos, item):

134

def __setitem__(self, pos, item):

135

self.p.index[pos] = item

135

self.p.index[pos] = item

136

def __delitem__(self, pos):

136

def __delitem__(self, pos):

137

del self.p.index[pos]

137

del self.p.index[pos]

138

def append(self, e):

138

def append(self, e):

139

self.p.index.append(e)

139

self.p.index.append(e)

140

141

class lazymap(object):

141

class lazymap(object):

142

"""a lazy version of the node map"""

142

"""a lazy version of the node map"""

143

def __init__(self, parser):

143

def __init__(self, parser):

144

self.p = parser

144

self.p = parser

145

def load(self, key):

145

def load(self, key):

146

if self.p.all: return

146

if self.p.all: return

147

n = self.p.data.find(key)

147

n = self.p.data.find(key)

148

if n < 0:

148

if n < 0:

149

raise KeyError(key)

149

raise KeyError(key)

150

pos = n / self.p.s

150

pos = n / self.p.s

151

self.p.load(pos)

151

self.p.load(pos)

152

def __contains__(self, key):

152

def __contains__(self, key):

153

self.p.load()

153

self.p.load()

154

return key in self.p.map

154

return key in self.p.map

155

def __iter__(self):

155

def __iter__(self):

156

yield nullid

156

yield nullid

157

for i in xrange(self.p.l):

157

for i in xrange(self.p.l):

158

try:

158

try:

159

yield self.p.index[i][-1]

159

yield self.p.index[i][-1]

160

except:

160

except:

161

self.p.load(i)

161

self.p.load(i)

162

yield self.p.index[i][-1]

162

yield self.p.index[i][-1]

163

def __getitem__(self, key):

163

def __getitem__(self, key):

164

try:

164

try:

165

return self.p.map[key]

165

return self.p.map[key]

166

except KeyError:

166

except KeyError:

167

try:

167

try:

168

self.load(key)

168

self.load(key)

169

return self.p.map[key]

169

return self.p.map[key]

170

except KeyError:

170

except KeyError:

171

raise KeyError("node " + hex(key))

171

raise KeyError("node " + hex(key))

172

def __setitem__(self, key, val):

172

def __setitem__(self, key, val):

173

self.p.map[key] = val

173

self.p.map[key] = val

174

def __delitem__(self, key):

174

def __delitem__(self, key):

175

del self.p.map[key]

175

del self.p.map[key]

176

177

class RevlogError(Exception): pass

177

class RevlogError(Exception): pass

178

179

class revlog(object):

179

class revlog(object):

180

"""

180

"""

181

the underlying revision storage object

181

the underlying revision storage object

182

183

A revlog consists of two parts, an index and the revision data.

183

A revlog consists of two parts, an index and the revision data.

184

185

The index is a file with a fixed record size containing

185

The index is a file with a fixed record size containing

186

information on each revision, includings its nodeid (hash), the

186

information on each revision, includings its nodeid (hash), the

187

nodeids of its parents, the position and offset of its data within

187

nodeids of its parents, the position and offset of its data within

188

the data file, and the revision it's based on. Finally, each entry

188

the data file, and the revision it's based on. Finally, each entry

189

contains a linkrev entry that can serve as a pointer to external

189

contains a linkrev entry that can serve as a pointer to external

190

data.

190

data.

191

192

The revision data itself is a linear collection of data chunks.

192

The revision data itself is a linear collection of data chunks.

193

Each chunk represents a revision and is usually represented as a

193

Each chunk represents a revision and is usually represented as a

194

delta against the previous chunk. To bound lookup time, runs of

194

delta against the previous chunk. To bound lookup time, runs of

195

deltas are limited to about 2 times the length of the original

195

deltas are limited to about 2 times the length of the original

196

version data. This makes retrieval of a version proportional to

196

version data. This makes retrieval of a version proportional to

197

its size, or O(1) relative to the number of revisions.

197

its size, or O(1) relative to the number of revisions.

198

199

Both pieces of the revlog are written to in an append-only

199

Both pieces of the revlog are written to in an append-only

200

fashion, which means we never need to rewrite a file to insert or

200

fashion, which means we never need to rewrite a file to insert or

201

remove data, and can use some simple techniques to avoid the need

201

remove data, and can use some simple techniques to avoid the need

202

for locking while reading.

202

for locking while reading.

203

"""

203

"""

204

def __init__(self, opener, indexfile, datafile, defversion=0):

204

def __init__(self, opener, indexfile, datafile, defversion=0):

205

"""

205

"""

206

create a revlog object

206

create a revlog object

207

208

opener is a function that abstracts the file opening operation

208

opener is a function that abstracts the file opening operation

209

and can be used to implement COW semantics or the like.

209

and can be used to implement COW semantics or the like.

210

"""

210

"""

211

self.indexfile = indexfile

211

self.indexfile = indexfile

212

self.datafile = datafile

212

self.datafile = datafile

213

self.opener = opener

213

self.opener = opener

214

215

self.indexstat = None

215

self.indexstat = None

216

self.cache = None

216

self.cache = None

217

self.chunkcache = None

217

self.chunkcache = None

218

self.defversion = defversion

218

self.defversion = defversion

219

self.load()

219

self.load()

220

221

def load(self):

221

def load(self):

222

v = self.defversion

222

v = self.defversion

223

try:

223

try:

224

f = self.opener(self.indexfile)

224

f = self.opener(self.indexfile)

225

i = f.read()

225

i = f.read()

226

except IOError, inst:

226

except IOError, inst:

227

if inst.errno != errno.ENOENT:

227

if inst.errno != errno.ENOENT:

228

raise

228

raise

229

i = ""

229

i = ""

230

else:

230

else:

231

try:

231

try:

232

st = os.fstat(f.fileno())

232

st = os.fstat(f.fileno())

233

except AttributeError, inst:

233

except AttributeError, inst:

234

st = None

234

st = None

235

else:

235

else:

236

oldst = self.indexstat

236

oldst = self.indexstat

237

if (oldst and st.st_dev == oldst.st_dev

237

if (oldst and st.st_dev == oldst.st_dev

238

and st.st_ino == oldst.st_ino

238

and st.st_ino == oldst.st_ino

239

and st.st_mtime == oldst.st_mtime

239

and st.st_mtime == oldst.st_mtime

240

and st.st_ctime == oldst.st_ctime):

240

and st.st_ctime == oldst.st_ctime):

241

return

241

return

242

self.indexstat = st

242

self.indexstat = st

243

if len(i) > 0:

243

if len(i) > 0:

244

v = struct.unpack(versionformat, i[:4])[0]

244

v = struct.unpack(versionformat, i[:4])[0]

245

flags = v & ~0xFFFF

245

flags = v & ~0xFFFF

246

fmt = v & 0xFFFF

246

fmt = v & 0xFFFF

247

if fmt == 0:

247

if fmt == 0:

248

if flags:

248

if flags:

249

raise RevlogError(_("index %s invalid flags %x for format v0" %

249

raise RevlogError(_("index %s invalid flags %x for format v0" %

250

(self.indexfile, flags)))

250

(self.indexfile, flags)))

251

elif fmt == REVLOGNG:

251

elif fmt == REVLOGNG:

252

if flags & ~REVLOGNGINLINEDATA:

252

if flags & ~REVLOGNGINLINEDATA:

253

raise RevlogError(_("index %s invalid flags %x for revlogng" %

253

raise RevlogError(_("index %s invalid flags %x for revlogng" %

254

(self.indexfile, flags)))

254

(self.indexfile, flags)))

255

else:

255

else:

256

raise RevlogError(_("index %s invalid format %d" %

256

raise RevlogError(_("index %s invalid format %d" %

257

(self.indexfile, fmt)))

257

(self.indexfile, fmt)))

258

self.version = v

258

self.version = v

259

if v == 0:

259

if v == 0:

260

self.indexformat = indexformatv0

260

self.indexformat = indexformatv0

261

else:

261

else:

262

self.indexformat = indexformatng

262

self.indexformat = indexformatng

263

264

if i:

264

if i:

265

if not self.inlinedata() and st and st.st_size > 10000:

265

if not self.inlinedata() and st and st.st_size > 10000:

266

# big index, let's parse it on demand

266

# big index, let's parse it on demand

267

parser = lazyparser(i, self, self.indexformat)

267

parser = lazyparser(i, self, self.indexformat)

268

self.index = lazyindex(parser)

268

self.index = lazyindex(parser)

269

self.nodemap = lazymap(parser)

269

self.nodemap = lazymap(parser)

270

else:

270

else:

271

self.parseindex(i)

271

self.parseindex(i)

272

if self.inlinedata():

272

if self.inlinedata():

273

# we've already got the entire data file read in, save it

273

# we've already got the entire data file read in, save it

274

# in the chunk data

274

# in the chunk data

275

self.chunkcache = (0, i)

275

self.chunkcache = (0, i)

276

if self.version != 0:

276

if self.version != 0:

277

e = list(self.index[0])

277

e = list(self.index[0])

278

type = self.ngtype(e[0])

278

type = self.ngtype(e[0])

279

e[0] = self.offset_type(0, type)

279

e[0] = self.offset_type(0, type)

280

self.index[0] = e

280

self.index[0] = e

281

else:

281

else:

282

self.nodemap = { nullid: -1}

282

self.nodemap = { nullid: -1}

283

self.index = []

283

self.index = []

284

285

286

def parseindex(self, data):

286

def parseindex(self, data):

287

s = struct.calcsize(self.indexformat)

287

s = struct.calcsize(self.indexformat)

288

l = len(data)

288

l = len(data)

289

self.index = []

289

self.index = []

290

self.nodemap = {nullid: -1}

290

self.nodemap = {nullid: -1}

291

inline = self.inlinedata()

291

inline = self.inlinedata()

292

off = 0

292

off = 0

293

n = 0

293

n = 0

294

while off < l:

294

while off < l:

295

e = struct.unpack(self.indexformat, data[off:off + s])

295

e = struct.unpack(self.indexformat, data[off:off + s])

296

self.index.append(e)

296

self.index.append(e)

297

self.nodemap[e[-1]] = n

297

self.nodemap[e[-1]] = n

298

n += 1

298

n += 1

299

off += s

299

off += s

300

if inline:

300

if inline:

301

off += e[1]

301

off += e[1]

302

303

def ngoffset(self, q):

303

def ngoffset(self, q):

304

if q & 0xFFFF:

304

if q & 0xFFFF:

305

raise RevlogError(_('%s: incompatible revision flag %x') %

305

raise RevlogError(_('%s: incompatible revision flag %x') %

306

(self.indexfile, type))

306

(self.indexfile, type))

307

return long(q >> 16)

307

return long(q >> 16)

308

309

def ngtype(self, q):

309

def ngtype(self, q):

310

return int(q & 0xFFFF)

310

return int(q & 0xFFFF)

311

312

def offset_type(self, offset, type):

312

def offset_type(self, offset, type):

313

return long(long(offset) << 16 | type)

313

return long(long(offset) << 16 | type)

314

315

def loadindexmap(self):

315

def loadindexmap(self):

316

"""loads both the map and the index from the lazy parser"""

316

"""loads both the map and the index from the lazy parser"""

317

if isinstance(self.index, lazyindex):

317

if isinstance(self.index, lazyindex):

318

p = self.index.p

318

p = self.index.p

319

p.load()

319

p.load()

320

321

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

321

def inlinedata(self): return self.version & REVLOGNGINLINEDATA

322

def tip(self): return self.node(len(self.index) - 1)

322

def tip(self): return self.node(len(self.index) - 1)

323

def count(self): return len(self.index)

323

def count(self): return len(self.index)

324

def node(self, rev):

324

def node(self, rev):

325

return (rev < 0) and nullid or self.index[rev][-1]

325

return (rev < 0) and nullid or self.index[rev][-1]

326

def rev(self, node):

326

def rev(self, node):

327

try:

327

try:

328

return self.nodemap[node]

328

return self.nodemap[node]

329

except KeyError:

329

except KeyError:

330

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

330

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

331

def linkrev(self, node): return self.index[self.rev(node)][-4]

331

def linkrev(self, node): return self.index[self.rev(node)][-4]

332

def parents(self, node):

332

def parents(self, node):

333

if node == nullid: return (nullid, nullid)

333

if node == nullid: return (nullid, nullid)

334

r = self.rev(node)

334

r = self.rev(node)

335

d = self.index[r][-3:-1]

335

d = self.index[r][-3:-1]

336

if self.version == 0:

336

if self.version == 0:

337

return d

337

return d

338

return [ self.node(x) for x in d ]

338

return [ self.node(x) for x in d ]

339

def start(self, rev):

339

def start(self, rev):

340

if rev < 0:

340

if rev < 0:

341

return -1

341

return -1

342

if self.version != 0:

342

if self.version != 0:

343

return self.ngoffset(self.index[rev][0])

343

return self.ngoffset(self.index[rev][0])

344

return self.index[rev][0]

344

return self.index[rev][0]

345

def end(self, rev): return self.start(rev) + self.length(rev)

345

def end(self, rev): return self.start(rev) + self.length(rev)

346

347

def length(self, rev):

347

def length(self, rev):

348

if rev < 0:

348

if rev < 0:

349

return 0

349

return 0

350

else:

350

else:

351

return self.index[rev][1]

351

return self.index[rev][1]

352

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

352

def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]

353

354

def reachable(self, rev, stop=None):

354

def reachable(self, rev, stop=None):

355

reachable = {}

355

reachable = {}

356

visit = [rev]

356

visit = [rev]

357

reachable[rev] = 1

357

reachable[rev] = 1

358

if stop:

358

if stop:

359

stopn = self.rev(stop)

359

stopn = self.rev(stop)

360

else:

360

else:

361

stopn = 0

361

stopn = 0

362

while visit:

362

while visit:

363

n = visit.pop(0)

363

n = visit.pop(0)

364

if n == stop:

364

if n == stop:

365

continue

365

continue

366

if n == nullid:

366

if n == nullid:

367

continue

367

continue

368

for p in self.parents(n):

368

for p in self.parents(n):

369

if self.rev(p) < stopn:

369

if self.rev(p) < stopn:

370

continue

370

continue

371

if p not in reachable:

371

if p not in reachable:

372

reachable[p] = 1

372

reachable[p] = 1

373

visit.append(p)

373

visit.append(p)

374

return reachable

374

return reachable

375

376

def nodesbetween(self, roots=None, heads=None):

376

def nodesbetween(self, roots=None, heads=None):

377

"""Return a tuple containing three elements. Elements 1 and 2 contain

377

"""Return a tuple containing three elements. Elements 1 and 2 contain

378

a final list bases and heads after all the unreachable ones have been

378

a final list bases and heads after all the unreachable ones have been

379

pruned. Element 0 contains a topologically sorted list of all

379

pruned. Element 0 contains a topologically sorted list of all

380

381

nodes that satisfy these constraints:

381

nodes that satisfy these constraints:

382

1. All nodes must be descended from a node in roots (the nodes on

382

1. All nodes must be descended from a node in roots (the nodes on

383

roots are considered descended from themselves).

383

roots are considered descended from themselves).

384

2. All nodes must also be ancestors of a node in heads (the nodes in

384

2. All nodes must also be ancestors of a node in heads (the nodes in

385

heads are considered to be their own ancestors).

385

heads are considered to be their own ancestors).

386

387

If roots is unspecified, nullid is assumed as the only root.

387

If roots is unspecified, nullid is assumed as the only root.

388

If heads is unspecified, it is taken to be the output of the

388

If heads is unspecified, it is taken to be the output of the

389

heads method (i.e. a list of all nodes in the repository that

389

heads method (i.e. a list of all nodes in the repository that

390

have no children)."""

390

have no children)."""

391

nonodes = ([], [], [])

391

nonodes = ([], [], [])

392

if roots is not None:

392

if roots is not None:

393

roots = list(roots)

393

roots = list(roots)

394

if not roots:

394

if not roots:

395

return nonodes

395

return nonodes

396

lowestrev = min([self.rev(n) for n in roots])

396

lowestrev = min([self.rev(n) for n in roots])

397

else:

397

else:

398

roots = [nullid] # Everybody's a descendent of nullid

398

roots = [nullid] # Everybody's a descendent of nullid

399

lowestrev = -1

399

lowestrev = -1

400

if (lowestrev == -1) and (heads is None):

400

if (lowestrev == -1) and (heads is None):

401

# We want _all_ the nodes!

401

# We want _all_ the nodes!

402

return ([self.node(r) for r in xrange(0, self.count())],

402

return ([self.node(r) for r in xrange(0, self.count())],

403

[nullid], list(self.heads()))

403

[nullid], list(self.heads()))

404

if heads is None:

404

if heads is None:

405

# All nodes are ancestors, so the latest ancestor is the last

405

# All nodes are ancestors, so the latest ancestor is the last

406

# node.

406

# node.

407

highestrev = self.count() - 1

407

highestrev = self.count() - 1

408

# Set ancestors to None to signal that every node is an ancestor.

408

# Set ancestors to None to signal that every node is an ancestor.

409

ancestors = None

409

ancestors = None

410

# Set heads to an empty dictionary for later discovery of heads

410

# Set heads to an empty dictionary for later discovery of heads

411

heads = {}

411

heads = {}

412

else:

412

else:

413

heads = list(heads)

413

heads = list(heads)

414

if not heads:

414

if not heads:

415

return nonodes

415

return nonodes

416

ancestors = {}

416

ancestors = {}

417

# Start at the top and keep marking parents until we're done.

417

# Start at the top and keep marking parents until we're done.

418

nodestotag = heads[:]

418

nodestotag = heads[:]

419

# Turn heads into a dictionary so we can remove 'fake' heads.

419

# Turn heads into a dictionary so we can remove 'fake' heads.

420

# Also, later we will be using it to filter out the heads we can't

420

# Also, later we will be using it to filter out the heads we can't

421

# find from roots.

421

# find from roots.

422

heads = dict.fromkeys(heads, 0)

422

heads = dict.fromkeys(heads, 0)

423

# Remember where the top was so we can use it as a limit later.

423

# Remember where the top was so we can use it as a limit later.

424

highestrev = max([self.rev(n) for n in nodestotag])

424

highestrev = max([self.rev(n) for n in nodestotag])

425

while nodestotag:

425

while nodestotag:

426

# grab a node to tag

426

# grab a node to tag

427

n = nodestotag.pop()

427

n = nodestotag.pop()

428

# Never tag nullid

428

# Never tag nullid

429

if n == nullid:

429

if n == nullid:

430

continue

430

continue

431

# A node's revision number represents its place in a

431

# A node's revision number represents its place in a

432

# topologically sorted list of nodes.

432

# topologically sorted list of nodes.

433

r = self.rev(n)

433

r = self.rev(n)

434

if r >= lowestrev:

434

if r >= lowestrev:

435

if n not in ancestors:

435

if n not in ancestors:

436

# If we are possibly a descendent of one of the roots

436

# If we are possibly a descendent of one of the roots

437

# and we haven't already been marked as an ancestor

437

# and we haven't already been marked as an ancestor

438

ancestors[n] = 1 # Mark as ancestor

438

ancestors[n] = 1 # Mark as ancestor

439

# Add non-nullid parents to list of nodes to tag.

439

# Add non-nullid parents to list of nodes to tag.

440

nodestotag.extend([p for p in self.parents(n) if

440

nodestotag.extend([p for p in self.parents(n) if

441

p != nullid])

441

p != nullid])

442

elif n in heads: # We've seen it before, is it a fake head?

442

elif n in heads: # We've seen it before, is it a fake head?

443

# So it is, real heads should not be the ancestors of

443

# So it is, real heads should not be the ancestors of

444

# any other heads.

444

# any other heads.

445

heads.pop(n)

445

heads.pop(n)

446

if not ancestors:

446

if not ancestors:

447

return nonodes

447

return nonodes

448

# Now that we have our set of ancestors, we want to remove any

448

# Now that we have our set of ancestors, we want to remove any

449

# roots that are not ancestors.

449

# roots that are not ancestors.

450

451

# If one of the roots was nullid, everything is included anyway.

451

# If one of the roots was nullid, everything is included anyway.

452

if lowestrev > -1:

452

if lowestrev > -1:

453

# But, since we weren't, let's recompute the lowest rev to not

453

# But, since we weren't, let's recompute the lowest rev to not

454

# include roots that aren't ancestors.

454

# include roots that aren't ancestors.

455

456

# Filter out roots that aren't ancestors of heads

456

# Filter out roots that aren't ancestors of heads

457

roots = [n for n in roots if n in ancestors]

457

roots = [n for n in roots if n in ancestors]

458

# Recompute the lowest revision

458

# Recompute the lowest revision

459

if roots:

459

if roots:

460

lowestrev = min([self.rev(n) for n in roots])

460

lowestrev = min([self.rev(n) for n in roots])

461

else:

461

else:

462

# No more roots? Return empty list

462

# No more roots? Return empty list

463

return nonodes

463

return nonodes

464

else:

464

else:

465

# We are descending from nullid, and don't need to care about

465

# We are descending from nullid, and don't need to care about

466

# any other roots.

466

# any other roots.

467

lowestrev = -1

467

lowestrev = -1

468

roots = [nullid]

468

roots = [nullid]

469

# Transform our roots list into a 'set' (i.e. a dictionary where the

469

# Transform our roots list into a 'set' (i.e. a dictionary where the

470

# values don't matter.

470

# values don't matter.

471

descendents = dict.fromkeys(roots, 1)

471

descendents = dict.fromkeys(roots, 1)

472

# Also, keep the original roots so we can filter out roots that aren't

472

# Also, keep the original roots so we can filter out roots that aren't

473

# 'real' roots (i.e. are descended from other roots).

473

# 'real' roots (i.e. are descended from other roots).

474

roots = descendents.copy()

474

roots = descendents.copy()

475

# Our topologically sorted list of output nodes.

475

# Our topologically sorted list of output nodes.

476

orderedout = []

476

orderedout = []

477

# Don't start at nullid since we don't want nullid in our output list,

477

# Don't start at nullid since we don't want nullid in our output list,

478

# and if nullid shows up in descedents, empty parents will look like

478

# and if nullid shows up in descedents, empty parents will look like

479

# they're descendents.

479

# they're descendents.

480

for r in xrange(max(lowestrev, 0), highestrev + 1):

480

for r in xrange(max(lowestrev, 0), highestrev + 1):

481

n = self.node(r)

481

n = self.node(r)

482

isdescendent = False

482

isdescendent = False

483

if lowestrev == -1: # Everybody is a descendent of nullid

483

if lowestrev == -1: # Everybody is a descendent of nullid

484

isdescendent = True

484

isdescendent = True

485

elif n in descendents:

485

elif n in descendents:

486

# n is already a descendent

486

# n is already a descendent

487

isdescendent = True

487

isdescendent = True

488

# This check only needs to be done here because all the roots

488

# This check only needs to be done here because all the roots

489

# will start being marked is descendents before the loop.

489

# will start being marked is descendents before the loop.

490

if n in roots:

490

if n in roots:

491

# If n was a root, check if it's a 'real' root.

491

# If n was a root, check if it's a 'real' root.

492

p = tuple(self.parents(n))

492

p = tuple(self.parents(n))

493

# If any of its parents are descendents, it's not a root.

493

# If any of its parents are descendents, it's not a root.

494

if (p[0] in descendents) or (p[1] in descendents):

494

if (p[0] in descendents) or (p[1] in descendents):

495

roots.pop(n)

495

roots.pop(n)

496

else:

496

else:

497

p = tuple(self.parents(n))

497

p = tuple(self.parents(n))

498

# A node is a descendent if either of its parents are

498

# A node is a descendent if either of its parents are

499

# descendents. (We seeded the dependents list with the roots

499

# descendents. (We seeded the dependents list with the roots

500

# up there, remember?)

500

# up there, remember?)

501

if (p[0] in descendents) or (p[1] in descendents):

501

if (p[0] in descendents) or (p[1] in descendents):

502

descendents[n] = 1

502

descendents[n] = 1

503

isdescendent = True

503

isdescendent = True

504

if isdescendent and ((ancestors is None) or (n in ancestors)):

504

if isdescendent and ((ancestors is None) or (n in ancestors)):

505

# Only include nodes that are both descendents and ancestors.

505

# Only include nodes that are both descendents and ancestors.

506

orderedout.append(n)

506

orderedout.append(n)

507

if (ancestors is not None) and (n in heads):

507

if (ancestors is not None) and (n in heads):

508

# We're trying to figure out which heads are reachable

508

# We're trying to figure out which heads are reachable

509

# from roots.

509

# from roots.

510

# Mark this head as having been reached

510

# Mark this head as having been reached

511

heads[n] = 1

511

heads[n] = 1

512

elif ancestors is None:

512

elif ancestors is None:

513

# Otherwise, we're trying to discover the heads.

513

# Otherwise, we're trying to discover the heads.

514

# Assume this is a head because if it isn't, the next step

514

# Assume this is a head because if it isn't, the next step

515

# will eventually remove it.

515

# will eventually remove it.

516

heads[n] = 1

516

heads[n] = 1

517

# But, obviously its parents aren't.

517

# But, obviously its parents aren't.

518

for p in self.parents(n):

518

for p in self.parents(n):

519

heads.pop(p, None)

519

heads.pop(p, None)

520

heads = [n for n in heads.iterkeys() if heads[n] != 0]

520

heads = [n for n in heads.iterkeys() if heads[n] != 0]

521

roots = roots.keys()

521

roots = roots.keys()

522

assert orderedout

522

assert orderedout

523

assert roots

523

assert roots

524

assert heads

524

assert heads

525

return (orderedout, roots, heads)

525

return (orderedout, roots, heads)

526

527

def heads(self, start=None):

527

def heads(self, start=None):

528

"""return the list of all nodes that have no children

528

"""return the list of all nodes that have no children

529

530

if start is specified, only heads that are descendants of

530

if start is specified, only heads that are descendants of

531

start will be returned

531

start will be returned

532

533

"""

533

"""

534

if start is None:

534

if start is None:

535

start = nullid

535

start = nullid

536

reachable = {start: 1}

536

reachable = {start: 1}

537

heads = {start: 1}

537

heads = {start: 1}

538

startrev = self.rev(start)

538

startrev = self.rev(start)

539

540

for r in xrange(startrev + 1, self.count()):

540

for r in xrange(startrev + 1, self.count()):

541

n = self.node(r)

541

n = self.node(r)

542

for pn in self.parents(n):

542

for pn in self.parents(n):

543

if pn in reachable:

543

if pn in reachable:

544

reachable[n] = 1

544

reachable[n] = 1

545

heads[n] = 1

545

heads[n] = 1

546

if pn in heads:

546

if pn in heads:

547

del heads[pn]

547

del heads[pn]

548

return heads.keys()

548

return heads.keys()

549

550

def children(self, node):

550

def children(self, node):

551

"""find the children of a given node"""

551

"""find the children of a given node"""

552

c = []

552

c = []

553

p = self.rev(node)

553

p = self.rev(node)

554

for r in range(p + 1, self.count()):

554

for r in range(p + 1, self.count()):

555

n = self.node(r)

555

n = self.node(r)

556

for pn in self.parents(n):

556

for pn in self.parents(n):

557

if pn == node:

557

if pn == node:

558

c.append(n)

558

c.append(n)

559

continue

559

continue

560

elif pn == nullid:

560

elif pn == nullid:

561

continue

561

continue

562

return c

562

return c

563

564

def lookup(self, id):

564

def lookup(self, id):

565

"""locate a node based on revision number or subset of hex nodeid"""

565

"""locate a node based on revision number or subset of hex nodeid"""

566

try:

566

try:

567

rev = int(id)

567

rev = int(id)

568

if str(rev) != id: raise ValueError

568

if str(rev) != id: raise ValueError

569

if rev < 0: rev = self.count() + rev

569

if rev < 0: rev = self.count() + rev

570

if rev < 0 or rev >= self.count(): raise ValueError

570

if rev < 0 or rev >= self.count(): raise ValueError

571

return self.node(rev)

571

return self.node(rev)

572

except (ValueError, OverflowError):

572

except (ValueError, OverflowError):

573

c = []

573

c = []

574

for n in self.nodemap:

574

for n in self.nodemap:

575

if hex(n).startswith(id):

575

if hex(n).startswith(id):

576

c.append(n)

576

c.append(n)

577

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

577

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

578

if len(c) < 1: raise RevlogError(_("No match found"))

578

if len(c) < 1: raise RevlogError(_("No match found"))

579

return c[0]

579

return c[0]

580

581

return None

581

return None

582

583

def diff(self, a, b):

583

def diff(self, a, b):

584

"""return a delta between two revisions"""

584

"""return a delta between two revisions"""

585

return mdiff.textdiff(a, b)

585

return mdiff.textdiff(a, b)

586

587

def patches(self, t, pl):

587

def patches(self, t, pl):

588

"""apply a list of patches to a string"""

588

"""apply a list of patches to a string"""

589

return mdiff.patches(t, pl)

589

return mdiff.patches(t, pl)

590

591

def chunk(self, rev, df=None, cachelen=4096):

591

def chunk(self, rev, df=None, cachelen=4096):

592

start, length = self.start(rev), self.length(rev)

592

start, length = self.start(rev), self.length(rev)

593

inline = self.inlinedata()

593

inline = self.inlinedata()

594

if inline:

594

if inline:

595

start += (rev + 1) * struct.calcsize(self.indexformat)

595

start += (rev + 1) * struct.calcsize(self.indexformat)

596

end = start + length

596

end = start + length

597

def loadcache(df):

597

def loadcache(df):

598

cache_length = max(cachelen, length) # 4k

598

cache_length = max(cachelen, length) # 4k

599

if not df:

599

if not df:

600

if inline:

600

if inline:

601

df = self.opener(self.indexfile)

601

df = self.opener(self.indexfile)

602

else:

602

else:

603

df = self.opener(self.datafile)

603

df = self.opener(self.datafile)

604

df.seek(start)

604

df.seek(start)

605

self.chunkcache = (start, df.read(cache_length))

605

self.chunkcache = (start, df.read(cache_length))

606

607

if not self.chunkcache:

607

if not self.chunkcache:

608

loadcache(df)

608

loadcache(df)

609

610

cache_start = self.chunkcache[0]

610

cache_start = self.chunkcache[0]

611

cache_end = cache_start + len(self.chunkcache[1])

611

cache_end = cache_start + len(self.chunkcache[1])

612

if start >= cache_start and end <= cache_end:

612

if start >= cache_start and end <= cache_end:

613

# it is cached

613

# it is cached

614

offset = start - cache_start

614

offset = start - cache_start

615

else:

615

else:

616

loadcache(df)

616

loadcache(df)

617

offset = 0

617

offset = 0

618

619

#def checkchunk():

619

#def checkchunk():

620

# df = self.opener(self.datafile)

620

# df = self.opener(self.datafile)

621

# df.seek(start)

621

# df.seek(start)

622

# return df.read(length)

622

# return df.read(length)

623

#assert s == checkchunk()

623

#assert s == checkchunk()

624

return decompress(self.chunkcache[1][offset:offset + length])

624

return decompress(self.chunkcache[1][offset:offset + length])

625

626

def delta(self, node):

626

def delta(self, node):

627

"""return or calculate a delta between a node and its predecessor"""

627

"""return or calculate a delta between a node and its predecessor"""

628

r = self.rev(node)

628

r = self.rev(node)

629

return self.revdiff(r - 1, r)

629

return self.revdiff(r - 1, r)

630

631

def revdiff(self, rev1, rev2):

631

def revdiff(self, rev1, rev2):

632

"""return or calculate a delta between two revisions"""

632

"""return or calculate a delta between two revisions"""

633

b1 = self.base(rev1)

633

b1 = self.base(rev1)

634

b2 = self.base(rev2)

634

b2 = self.base(rev2)

635

if b1 == b2 and rev1 + 1 == rev2:

635

if b1 == b2 and rev1 + 1 == rev2:

636

return self.chunk(rev2)

636

return self.chunk(rev2)

637

else:

637

else:

638

return self.diff(self.revision(self.node(rev1)),

638

return self.diff(self.revision(self.node(rev1)),

639

self.revision(self.node(rev2)))

639

self.revision(self.node(rev2)))

640

641

def revision(self, node):

641

def revision(self, node):

642

"""return an uncompressed revision of a given"""

642

"""return an uncompressed revision of a given"""

643

if node == nullid: return ""

643

if node == nullid: return ""

644

if self.cache and self.cache[0] == node: return self.cache[2]

644

if self.cache and self.cache[0] == node: return self.cache[2]

645

646

# look up what we need to read

646

# look up what we need to read

647

text = None

647

text = None

648

rev = self.rev(node)

648

rev = self.rev(node)

649

base = self.base(rev)

649

base = self.base(rev)

650

651

if self.inlinedata():

651

if self.inlinedata():

652

# we probably have the whole chunk cached

652

# we probably have the whole chunk cached

653

df = None

653

df = None

654

else:

654

else:

655

df = self.opener(self.datafile)

655

df = self.opener(self.datafile)

656

657

# do we have useful data cached?

657

# do we have useful data cached?

658

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

658

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

659

base = self.cache[1]

659

base = self.cache[1]

660

text = self.cache[2]

660

text = self.cache[2]

661

else:

661

else:

662

text = self.chunk(base, df=df)

662

text = self.chunk(base, df=df)

663

664

bins = []

664

bins = []

665

for r in xrange(base + 1, rev + 1):

665

for r in xrange(base + 1, rev + 1):

666

bins.append(self.chunk(r, df=df))

666

bins.append(self.chunk(r, df=df))

667

668

text = self.patches(text, bins)

668

text = self.patches(text, bins)

669

670

p1, p2 = self.parents(node)

670

p1, p2 = self.parents(node)

671

if node != hash(text, p1, p2):

671

if node != hash(text, p1, p2):

672

raise RevlogError(_("integrity check failed on %s:%d")

672

raise RevlogError(_("integrity check failed on %s:%d")

673

% (self.datafile, rev))

673

% (self.datafile, rev))

674

675

self.cache = (node, rev, text)

675

self.cache = (node, rev, text)

676

return text

676

return text

677

678

def checkinlinesize(self, tr, fp=None):

678

def checkinlinesize(self, tr, fp=None):

679

if not self.inlinedata():

679

if not self.inlinedata():

680

return

680

return

681

if not fp:

681

if not fp:

682

fp = self.opener(self.indexfile, 'r')

682

fp = self.opener(self.indexfile, 'r')

683

size = fp.tell()

683

size = fp.tell()

684

if size < 131072:

684

if size < 131072:

685

return

685

return

686

tr.add(self.datafile, 0)

686

tr.add(self.datafile, 0)

687

df = self.opener(self.datafile, 'w')

687

df = self.opener(self.datafile, 'w')

688

calc = struct.calcsize(self.indexformat)

688

calc = struct.calcsize(self.indexformat)

689

for r in xrange(self.count()):

689

for r in xrange(self.count()):

690

start = self.start(r) + (r + 1) * calc

690

start = self.start(r) + (r + 1) * calc

691

length = self.length(r)

691

length = self.length(r)

692

fp.seek(start)

692

fp.seek(start)

693

d = fp.read(length)

693

d = fp.read(length)

694

df.write(d)

694

df.write(d)

695

fp.close()

695

fp.close()

696

df.close()

696

df.close()

697

fp = self.opener(self.indexfile, 'w', atomictemp=True)

697

fp = self.opener(self.indexfile, 'w', atomictemp=True)

698

self.version &= ~(REVLOGNGINLINEDATA)

698

self.version &= ~(REVLOGNGINLINEDATA)

699

if self.count():

699

if self.count():

700

x = self.index[0]

700

x = self.index[0]

701

e = struct.pack(self.indexformat, *x)[4:]

701

e = struct.pack(self.indexformat, *x)[4:]

702

l = struct.pack(versionformat, self.version)

702

l = struct.pack(versionformat, self.version)

703

fp.write(l)

703

fp.write(l)

704

fp.write(e)

704

fp.write(e)

705

706

for i in xrange(1, self.count()):

706

for i in xrange(1, self.count()):

707

x = self.index[i]

707

x = self.index[i]

708

e = struct.pack(self.indexformat, *x)

708

e = struct.pack(self.indexformat, *x)

709

fp.write(e)

709

fp.write(e)

710

711

# if we don't call rename, the temp file will never replace the

711

# if we don't call rename, the temp file will never replace the

712

# real index

712

# real index

713

fp.rename()

713

fp.rename()

714

self.chunkcache = None

714

self.chunkcache = None

715

716

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

716

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

717

"""add a revision to the log

717

"""add a revision to the log

718

719

text - the revision data to add

719

text - the revision data to add

720

transaction - the transaction object used for rollback

720

transaction - the transaction object used for rollback

721

link - the linkrev data to add

721

link - the linkrev data to add

722

p1, p2 - the parent nodeids of the revision

722

p1, p2 - the parent nodeids of the revision

723

d - an optional precomputed delta

723

d - an optional precomputed delta

724

"""

724

"""

725

if text is None: text = ""

725

if text is None: text = ""

726

if p1 is None: p1 = self.tip()

726

if p1 is None: p1 = self.tip()

727

if p2 is None: p2 = nullid

727

if p2 is None: p2 = nullid

728

729

node = hash(text, p1, p2)

729

node = hash(text, p1, p2)

730

731

if node in self.nodemap:

731

if node in self.nodemap:

732

return node

732

return node

733

734

n = self.count()

734

n = self.count()

735

t = n - 1

735

t = n - 1

736

737

if n:

737

if n:

738

base = self.base(t)

738

base = self.base(t)

739

start = self.start(base)

739

start = self.start(base)

740

end = self.end(t)

740

end = self.end(t)

741

if not d:

741

if not d:

742

prev = self.revision(self.tip())

742

prev = self.revision(self.tip())

743

d = self.diff(prev, str(text))

743

d = self.diff(prev, str(text))

744

data = compress(d)

744

data = compress(d)

745

l = len(data[1]) + len(data[0])

745

l = len(data[1]) + len(data[0])

746

dist = end - start + l

746

dist = end - start + l

747

748

# full versions are inserted when the needed deltas

748

# full versions are inserted when the needed deltas

749

# become comparable to the uncompressed text

749

# become comparable to the uncompressed text

750

if not n or dist > len(text) * 2:

750

if not n or dist > len(text) * 2:

751

data = compress(text)

751

data = compress(text)

752

l = len(data[1]) + len(data[0])

752

l = len(data[1]) + len(data[0])

753

base = n

753

base = n

754

else:

754

else:

755

base = self.base(t)

755

base = self.base(t)

756

757

offset = 0

757

offset = 0

758

if t >= 0:

758

if t >= 0:

759

offset = self.end(t)

759

offset = self.end(t)

760

761

if self.version == 0:

761

if self.version == 0:

762

e = (offset, l, base, link, p1, p2, node)

762

e = (offset, l, base, link, p1, p2, node)

763

else:

763

else:

764

e = (self.offset_type(offset, 0), l, len(text),

764

e = (self.offset_type(offset, 0), l, len(text),

765

base, link, self.rev(p1), self.rev(p2), node)

765

base, link, self.rev(p1), self.rev(p2), node)

766

767

self.index.append(e)

767

self.index.append(e)

768

self.nodemap[node] = n

768

self.nodemap[node] = n

769

entry = struct.pack(self.indexformat, *e)

769

entry = struct.pack(self.indexformat, *e)

770

771

if not self.inlinedata():

771

if not self.inlinedata():

772

transaction.add(self.datafile, offset)

772

transaction.add(self.datafile, offset)

773

transaction.add(self.indexfile, n * len(entry))

773

transaction.add(self.indexfile, n * len(entry))

774

f = self.opener(self.datafile, "a")

774

f = self.opener(self.datafile, "a")

775

if data[0]:

775

if data[0]:

776

f.write(data[0])

776

f.write(data[0])

777

f.write(data[1])

777

f.write(data[1])

778

f = self.opener(self.indexfile, "a")

778

f = self.opener(self.indexfile, "a")

779

else:

779

else:

780

f = self.opener(self.indexfile, "a+")

780

f = self.opener(self.indexfile, "a+")

781

f.seek(0, 2)

781

transaction.add(self.indexfile, f.tell())

782

transaction.add(self.indexfile, f.tell())

782

783

if len(self.index) == 1 and self.version != 0:

784

if len(self.index) == 1 and self.version != 0:

784

l = struct.pack(versionformat, self.version)

785

l = struct.pack(versionformat, self.version)

785

f.write(l)

786

f.write(l)

786

entry = entry[4:]

787

entry = entry[4:]

787

788

f.write(entry)

789

f.write(entry)

789

790

if self.inlinedata():

791

if self.inlinedata():

791

f.write(data[0])

792

f.write(data[0])

792

f.write(data[1])

793

f.write(data[1])

793

self.checkinlinesize(transaction, f)

794

self.checkinlinesize(transaction, f)

794

795

self.cache = (node, n, text)

796

self.cache = (node, n, text)

796

return node

797

return node

797

798

def ancestor(self, a, b):

799

def ancestor(self, a, b):

799

"""calculate the least common ancestor of nodes a and b"""

800

"""calculate the least common ancestor of nodes a and b"""

800

# calculate the distance of every node from root

801

# calculate the distance of every node from root

801

dist = {nullid: 0}

802

dist = {nullid: 0}

802

for i in xrange(self.count()):

803

for i in xrange(self.count()):

803

n = self.node(i)

804

n = self.node(i)

804

p1, p2 = self.parents(n)

805

p1, p2 = self.parents(n)

805

dist[n] = max(dist[p1], dist[p2]) + 1

806

dist[n] = max(dist[p1], dist[p2]) + 1

806

807

# traverse ancestors in order of decreasing distance from root

808

# traverse ancestors in order of decreasing distance from root

808

def ancestors(node):

809

def ancestors(node):

809

# we store negative distances because heap returns smallest member

810

# we store negative distances because heap returns smallest member

810

h = [(-dist[node], node)]

811

h = [(-dist[node], node)]

811

seen = {}

812

seen = {}

812

while h:

813

while h:

813

d, n = heapq.heappop(h)

814

d, n = heapq.heappop(h)

814

if n not in seen:

815

if n not in seen:

815

seen[n] = 1

816

seen[n] = 1

816

yield (-d, n)

817

yield (-d, n)

817

for p in self.parents(n):

818

for p in self.parents(n):

818

heapq.heappush(h, (-dist[p], p))

819

heapq.heappush(h, (-dist[p], p))

819

820

def generations(node):

821

def generations(node):

821

sg, s = None, {}

822

sg, s = None, {}

822

for g,n in ancestors(node):

823

for g,n in ancestors(node):

823

if g != sg:

824

if g != sg:

824

if sg:

825

if sg:

825

yield sg, s

826

yield sg, s

826

sg, s = g, {n:1}

827

sg, s = g, {n:1}

827

else:

828

else:

828

s[n] = 1

829

s[n] = 1

829

yield sg, s

830

yield sg, s

830

831

x = generations(a)

832

x = generations(a)

832

y = generations(b)

833

y = generations(b)

833

gx = x.next()

834

gx = x.next()

834

gy = y.next()

835

gy = y.next()

835

836

# increment each ancestor list until it is closer to root than

837

# increment each ancestor list until it is closer to root than

837

# the other, or they match

838

# the other, or they match

838

while 1:

839

while 1:

839

#print "ancestor gen %s %s" % (gx[0], gy[0])

840

#print "ancestor gen %s %s" % (gx[0], gy[0])

840

if gx[0] == gy[0]:

841

if gx[0] == gy[0]:

841

# find the intersection

842

# find the intersection

842

i = [ n for n in gx[1] if n in gy[1] ]

843

i = [ n for n in gx[1] if n in gy[1] ]

843

if i:

844

if i:

844

return i[0]

845

return i[0]

845

else:

846

else:

846

#print "next"

847

#print "next"

847

gy = y.next()

848

gy = y.next()

848

gx = x.next()

849

gx = x.next()

849

elif gx[0] < gy[0]:

850

elif gx[0] < gy[0]:

850

#print "next y"

851

#print "next y"

851

gy = y.next()

852

gy = y.next()

852

else:

853

else:

853

#print "next x"

854

#print "next x"

854

gx = x.next()

855

gx = x.next()

855

856

def group(self, nodelist, lookup, infocollect=None):

857

def group(self, nodelist, lookup, infocollect=None):

857

"""calculate a delta group

858

"""calculate a delta group

858

859

Given a list of changeset revs, return a set of deltas and

860

Given a list of changeset revs, return a set of deltas and

860

metadata corresponding to nodes. the first delta is

861

metadata corresponding to nodes. the first delta is

861

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

862

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

862

have this parent as it has all history before these

863

have this parent as it has all history before these

863

changesets. parent is parent[0]

864

changesets. parent is parent[0]

864

"""

865

"""

865

revs = [self.rev(n) for n in nodelist]

866

revs = [self.rev(n) for n in nodelist]

866

867

# if we don't have any revisions touched by these changesets, bail

868

# if we don't have any revisions touched by these changesets, bail

868

if not revs:

869

if not revs:

869

yield changegroup.closechunk()

870

yield changegroup.closechunk()

870

return

871

return

871

872

# add the parent of the first rev

873

# add the parent of the first rev

873

p = self.parents(self.node(revs[0]))[0]

874

p = self.parents(self.node(revs[0]))[0]

874

revs.insert(0, self.rev(p))

875

revs.insert(0, self.rev(p))

875

876

# build deltas

877

# build deltas

877

for d in xrange(0, len(revs) - 1):

878

for d in xrange(0, len(revs) - 1):

878

a, b = revs[d], revs[d + 1]

879

a, b = revs[d], revs[d + 1]

879

nb = self.node(b)

880

nb = self.node(b)

880

881

if infocollect is not None:

882

if infocollect is not None:

882

infocollect(nb)

883

infocollect(nb)

883

884

d = self.revdiff(a, b)

885

d = self.revdiff(a, b)

885

p = self.parents(nb)

886

p = self.parents(nb)

886

meta = nb + p[0] + p[1] + lookup(nb)

887

meta = nb + p[0] + p[1] + lookup(nb)

887

yield changegroup.genchunk("%s%s" % (meta, d))

888

yield changegroup.genchunk("%s%s" % (meta, d))

888

889

yield changegroup.closechunk()

890

yield changegroup.closechunk()

890

891

def addgroup(self, revs, linkmapper, transaction, unique=0):

892

def addgroup(self, revs, linkmapper, transaction, unique=0):

892

"""

893

"""

893

add a delta group

894

add a delta group

894

895

given a set of deltas, add them to the revision log. the

896

given a set of deltas, add them to the revision log. the

896

first delta is against its parent, which should be in our

897

first delta is against its parent, which should be in our

897

log, the rest are against the previous delta.

898

log, the rest are against the previous delta.

898

"""

899

"""

899

900

#track the base of the current delta log

901

#track the base of the current delta log

901

r = self.count()

902

r = self.count()

902

t = r - 1

903

t = r - 1

903

node = None

904

node = None

904

905

base = prev = -1

906

base = prev = -1

906

start = end = measure = 0

907

start = end = measure = 0

907

if r:

908

if r:

908

end = self.end(t)

909

end = self.end(t)

909

910

ifh = self.opener(self.indexfile, "a+")

911

ifh = self.opener(self.indexfile, "a+")

912

ifh.seek(0, 2)

911

transaction.add(self.indexfile, ifh.tell())

913

transaction.add(self.indexfile, ifh.tell())

912

if self.inlinedata():

914

if self.inlinedata():

913

dfh = None

915

dfh = None

914

else:

916

else:

915

transaction.add(self.datafile, end)

917

transaction.add(self.datafile, end)

916

dfh = self.opener(self.datafile, "a")

918

dfh = self.opener(self.datafile, "a")

917

919

918

# loop through our set of deltas

920

# loop through our set of deltas

919

chain = None

921

chain = None

920

for chunk in revs:

922

for chunk in revs:

921

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

923

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

922

link = linkmapper(cs)

924

link = linkmapper(cs)

923

if node in self.nodemap:

925

if node in self.nodemap:

924

# this can happen if two branches make the same change

926

# this can happen if two branches make the same change

925

# if unique:

927

# if unique:

926

# raise RevlogError(_("already have %s") % hex(node[:4]))

928

# raise RevlogError(_("already have %s") % hex(node[:4]))

927

chain = node

929

chain = node

928

continue

930

continue

929

delta = chunk[80:]

931

delta = chunk[80:]

930

932

931

for p in (p1, p2):

933

for p in (p1, p2):

932

if not p in self.nodemap:

934

if not p in self.nodemap:

933

raise RevlogError(_("unknown parent %s") % short(p1))

935

raise RevlogError(_("unknown parent %s") % short(p1))

934

936

935

if not chain:

937

if not chain:

936

# retrieve the parent revision of the delta chain

938

# retrieve the parent revision of the delta chain

937

chain = p1

939

chain = p1

938

if not chain in self.nodemap:

940

if not chain in self.nodemap:

939

raise RevlogError(_("unknown base %s") % short(chain[:4]))

941

raise RevlogError(_("unknown base %s") % short(chain[:4]))

940

942

941

# full versions are inserted when the needed deltas become

943

# full versions are inserted when the needed deltas become

942

# comparable to the uncompressed text or when the previous

944

# comparable to the uncompressed text or when the previous

943

# version is not the one we have a delta against. We use

945

# version is not the one we have a delta against. We use

944

# the size of the previous full rev as a proxy for the

946

# the size of the previous full rev as a proxy for the

945

# current size.

947

# current size.

946

948

947

if chain == prev:

949

if chain == prev:

948

tempd = compress(delta)

950

tempd = compress(delta)

949

cdelta = tempd[0] + tempd[1]

951

cdelta = tempd[0] + tempd[1]

950

952

951

if chain != prev or (end - start + len(cdelta)) > measure * 2:

953

if chain != prev or (end - start + len(cdelta)) > measure * 2:

952

# flush our writes here so we can read it in revision

954

# flush our writes here so we can read it in revision

953

if dfh:

955

if dfh:

954

dfh.flush()

956

dfh.flush()

955

ifh.flush()

957

ifh.flush()

956

text = self.revision(chain)

958

text = self.revision(chain)

957

text = self.patches(text, [delta])

959

text = self.patches(text, [delta])

958

chk = self.addrevision(text, transaction, link, p1, p2)

960

chk = self.addrevision(text, transaction, link, p1, p2)

959

if chk != node:

961

if chk != node:

960

raise RevlogError(_("consistency error adding group"))

962

raise RevlogError(_("consistency error adding group"))

961

measure = len(text)

963

measure = len(text)

962

else:

964

else:

963

if self.version == 0:

965

if self.version == 0:

964

e = (end, len(cdelta), base, link, p1, p2, node)

966

e = (end, len(cdelta), base, link, p1, p2, node)

965

else:

967

else:

966

e = (self.offset_type(end, 0), len(cdelta), -1, base,

968

e = (self.offset_type(end, 0), len(cdelta), -1, base,

967

link, self.rev(p1), self.rev(p2), node)

969

link, self.rev(p1), self.rev(p2), node)

968

self.index.append(e)

970

self.index.append(e)

969

self.nodemap[node] = r

971

self.nodemap[node] = r

970

if self.inlinedata():

972

if self.inlinedata():

971

ifh.write(struct.pack(self.indexformat, *e))

973

ifh.write(struct.pack(self.indexformat, *e))

972

ifh.write(cdelta)

974

ifh.write(cdelta)

973

self.checkinlinesize(transaction, ifh)

975

self.checkinlinesize(transaction, ifh)

974

if not self.inlinedata():

976

if not self.inlinedata():

975

dfh = self.opener(self.datafile, "a")

977

dfh = self.opener(self.datafile, "a")

976

ifh = self.opener(self.indexfile, "a")

978

ifh = self.opener(self.indexfile, "a")

977

else:

979

else:

978

if not dfh:

980

if not dfh:

979

# addrevision switched from inline to conventional

981

# addrevision switched from inline to conventional

980

# reopen the index

982

# reopen the index

981

dfh = self.opener(self.datafile, "a")

983

dfh = self.opener(self.datafile, "a")

982

ifh = self.opener(self.indexfile, "a")

984

ifh = self.opener(self.indexfile, "a")

983

dfh.write(cdelta)

985

dfh.write(cdelta)

984

ifh.write(struct.pack(self.indexformat, *e))

986

ifh.write(struct.pack(self.indexformat, *e))

985

987

986

t, r, chain, prev = r, r + 1, node, node

988

t, r, chain, prev = r, r + 1, node, node

987

base = self.base(t)

989

base = self.base(t)

988

start = self.start(base)

990

start = self.start(base)

989

end = self.end(t)

991

end = self.end(t)

990

992

991

if node is None:

993

if node is None:

992

raise RevlogError(_("group to be added is empty"))

994

raise RevlogError(_("group to be added is empty"))

993

return node

995

return node

994

996

995

def strip(self, rev, minlink):

997

def strip(self, rev, minlink):

996

if self.count() == 0 or rev >= self.count():

998

if self.count() == 0 or rev >= self.count():

997

return

999

return

998

1000

999

if isinstance(self.index, lazyindex):

1001

if isinstance(self.index, lazyindex):

1000

self.loadindexmap()

1002

self.loadindexmap()

1001

1003

1002

# When stripping away a revision, we need to make sure it

1004

# When stripping away a revision, we need to make sure it

1003

# does not actually belong to an older changeset.

1005

# does not actually belong to an older changeset.

1004

# The minlink parameter defines the oldest revision

1006

# The minlink parameter defines the oldest revision

1005

# we're allowed to strip away.

1007

# we're allowed to strip away.

1006

while minlink > self.index[rev][-4]:

1008

while minlink > self.index[rev][-4]:

1007

rev += 1

1009

rev += 1

1008

if rev >= self.count():

1010

if rev >= self.count():

1009

return

1011

return

1010

1012

1011

# first truncate the files on disk

1013

# first truncate the files on disk

1012

end = self.start(rev)

1014

end = self.start(rev)

1013

if not self.inlinedata():

1015

if not self.inlinedata():

1014

df = self.opener(self.datafile, "a")

1016

df = self.opener(self.datafile, "a")

1015

df.truncate(end)

1017

df.truncate(end)

1016

end = rev * struct.calcsize(self.indexformat)

1018

end = rev * struct.calcsize(self.indexformat)

1017

else:

1019

else:

1018

end += rev * struct.calcsize(self.indexformat)

1020

end += rev * struct.calcsize(self.indexformat)

1019

1021

1020

indexf = self.opener(self.indexfile, "a")

1022

indexf = self.opener(self.indexfile, "a")

1021

indexf.truncate(end)

1023

indexf.truncate(end)

1022

1024

1023

# then reset internal state in memory to forget those revisions

1025

# then reset internal state in memory to forget those revisions

1024

self.cache = None

1026

self.cache = None

1025

self.chunkcache = None

1027

self.chunkcache = None

1026

for x in xrange(rev, self.count()):

1028

for x in xrange(rev, self.count()):

1027

del self.nodemap[self.node(x)]

1029

del self.nodemap[self.node(x)]

1028

1030

1029

del self.index[rev:]

1031

del self.index[rev:]

1030

1032

1031

def checksize(self):

1033

def checksize(self):

1032

expected = 0

1034

expected = 0

1033

if self.count():

1035

if self.count():

1034

expected = self.end(self.count() - 1)

1036

expected = self.end(self.count() - 1)

1035

1037

1036

try:

1038

try:

1037

f = self.opener(self.datafile)

1039

f = self.opener(self.datafile)

1038

f.seek(0, 2)

1040

f.seek(0, 2)

1039

actual = f.tell()

1041

actual = f.tell()

1040

dd = actual - expected

1042

dd = actual - expected

1041

except IOError, inst:

1043

except IOError, inst:

1042

if inst.errno != errno.ENOENT:

1044

if inst.errno != errno.ENOENT:

1043

raise

1045

raise

1044

dd = 0

1046

dd = 0

1045

1047

1046

try:

1048

try:

1047

f = self.opener(self.indexfile)

1049

f = self.opener(self.indexfile)

1048

f.seek(0, 2)

1050

f.seek(0, 2)

1049

actual = f.tell()

1051

actual = f.tell()

1050

s = struct.calcsize(self.indexformat)

1052

s = struct.calcsize(self.indexformat)

1051

i = actual / s

1053

i = actual / s

1052

di = actual - (i * s)

1054

di = actual - (i * s)

1053

if self.inlinedata():

1055

if self.inlinedata():

1054

databytes = 0

1056

databytes = 0

1055

for r in xrange(self.count()):

1057

for r in xrange(self.count()):

1056

databytes += self.length(r)

1058

databytes += self.length(r)

1057

dd = 0

1059

dd = 0

1058

di = actual - self.count() * s - databytes

1060

di = actual - self.count() * s - databytes

1059

except IOError, inst:

1061

except IOError, inst:

1060

if inst.errno != errno.ENOENT:

1062

if inst.errno != errno.ENOENT:

1061

raise

1063

raise

1062

di = 0

1064

di = 0

1063

1065

1064

return (dd, di)

1066

return (dd, di)

1065

1067

1066

1068

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii changegroup errno heapq mdiff os")
             demandload(globals(), "sha struct zlib")
             # revlog version strings
             REVLOGV0 = 0
             REVLOGNG = 1
             # revlog flags
             REVLOGNGINLINEDATA = (1 << 16)
             def flagstr(flag):
                 if flag == "inline":
                     return REVLOGNGINLINEDATA
                 raise RevlogError(_("unknown revlog flag %s" % flag))
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">i"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 By default we parse and load 1000 entries at a time.
                 If no position is specified, we load the whole index, and replace
                 the lazy objects in revlog with the underlying objects for
                 efficiency in cases where we look at most of the nodes.
                 """
                 def __init__(self, data, revlog, indexformat):
                     self.data = data
                     self.s = struct.calcsize(indexformat)
                     self.indexformat = indexformat
                     self.l = len(data)/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.all = 0
                     self.revlog = revlog
                 def load(self, pos=None):
                     if self.all: return
                     if pos is not None:
                         block = pos / 1000
                         i = block * 1000
                         end = min(self.l, i + 1000)
                     else:
                         self.all = 1
                         i = 0
                         end = self.l
                         self.revlog.index = self.index
                         self.revlog.nodemap = self.map
                     while i < end:
                         if not self.index[i]:
                             d = self.data[i * self.s: (i + 1) * self.s]
                             e = struct.unpack(self.indexformat, d)
                             self.index[i] = e
                             self.map[e[-1]] = i
                         i += 1
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.load(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = item
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     if self.p.all: return
                     n = self.p.data.find(key)
                     if n < 0:
                         raise KeyError(key)
                     pos = n / self.p.s
                     self.p.load(pos)
                 def __contains__(self, key):
                     self.p.load()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][-1]
                         except:
                             self.p.load(i)
                             yield self.p.index[i][-1]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile, defversion=0):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.indexstat = None
                     self.cache = None
                     self.chunkcache = None
                     self.defversion = defversion
                     self.load()
                 def load(self):
                     v = self.defversion
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read()
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     else:
                         try:
                             st = os.fstat(f.fileno())
                         except AttributeError, inst:
                             st = None
                         else:
                             oldst = self.indexstat
                             if (oldst and st.st_dev == oldst.st_dev
                                 and st.st_ino == oldst.st_ino
                                 and st.st_mtime == oldst.st_mtime
                                 and st.st_ctime == oldst.st_ctime):
                                 return
                             self.indexstat = st
                             if len(i) > 0:
                                 v = struct.unpack(versionformat, i[:4])[0]
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == 0:
                         if flags:
                             raise RevlogError(_("index %s invalid flags %x for format v0" %
                                                (self.indexfile, flags)))
                     elif fmt == REVLOGNG:
                         if flags & ~REVLOGNGINLINEDATA:
                             raise RevlogError(_("index %s invalid flags %x for revlogng" %
                                                (self.indexfile, flags)))
                     else:
                         raise RevlogError(_("index %s invalid format %d" %
                                            (self.indexfile, fmt)))
                     self.version = v
                     if v == 0:
                         self.indexformat = indexformatv0
                     else:
                         self.indexformat = indexformatng
                     if i:
                         if not self.inlinedata() and st and st.st_size > 10000:
                             # big index, let's parse it on demand
                             parser = lazyparser(i, self, self.indexformat)
                             self.index = lazyindex(parser)
                             self.nodemap = lazymap(parser)
                         else:
                             self.parseindex(i)
                         if self.inlinedata():
                             # we've already got the entire data file read in, save it
                             # in the chunk data
                             self.chunkcache = (0, i)
                         if self.version != 0:
                             e = list(self.index[0])
                             type = self.ngtype(e[0])
                             e[0] = self.offset_type(0, type)
                             self.index[0] = e
                     else:
                         self.nodemap = { nullid: -1}
                         self.index = []
                 def parseindex(self, data):
                     s = struct.calcsize(self.indexformat)
                     l = len(data)
                     self.index = []
                     self.nodemap =  {nullid: -1}
                     inline = self.inlinedata()
                     off = 0
                     n = 0
                     while off < l:
                         e = struct.unpack(self.indexformat, data[off:off + s])
                         self.index.append(e)
                         self.nodemap[e[-1]] = n
                         n += 1
                         off += s
                         if inline:
                             off += e[1]
                 def ngoffset(self, q):
                     if q & 0xFFFF:
                         raise RevlogError(_('%s: incompatible revision flag %x') %
                                            (self.indexfile, type))
                     return long(q >> 16)
                 def ngtype(self, q):
                     return int(q & 0xFFFF)
                 def offset_type(self, offset, type):
                     return long(long(offset) << 16 | type)
                 def loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.load()
                 def inlinedata(self): return self.version & REVLOGNGINLINEDATA
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev):
                     return (rev < 0) and nullid or self.index[rev][-1]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][-4]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     r = self.rev(node)
                     d = self.index[r][-3:-1]
                     if self.version == 0:
                         return d
                     return [ self.node(x) for x in d ]
                 def start(self, rev):
                     if rev < 0:
                         return -1
                     if self.version != 0:
                         return self.ngoffset(self.index[rev][0])
                     return self.index[rev][0]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     if rev < 0:
                         return 0
                     else:
                         return self.index[rev][1]
                 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     reachable = {start: 1}
                     heads = {start: 1}
                     startrev = self.rev(start)
                     for r in xrange(startrev + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn in reachable:
                                 reachable[n] = 1
                                 heads[n] = 1
                             if pn in heads:
                                 del heads[pn]
                     return heads.keys()
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev, df=None, cachelen=4096):
                     start, length = self.start(rev), self.length(rev)
                     inline = self.inlinedata()
                     if inline:
                         start += (rev + 1) * struct.calcsize(self.indexformat)
                     end = start + length
                     def loadcache(df):
                         cache_length = max(cachelen, length) # 4k
                         if not df:
                             if inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache(df)
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache(df)
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     b1 = self.base(rev1)
                     b2 = self.base(rev2)
                     if b1 == b2 and rev1 + 1 == rev2:
                         return self.chunk(rev2)
                     else:
                         return self.diff(self.revision(self.node(rev1)),
                                          self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     if self.inlinedata():
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.chunk(base, df=df)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r, df=df))
                     text = self.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self.inlinedata():
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                     size = fp.tell()
                     if size < 131072:
                         return
                     tr.add(self.datafile, 0)
                     df = self.opener(self.datafile, 'w')
                     calc = struct.calcsize(self.indexformat)
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     if self.count():
                         x = self.index[0]
                         e = struct.pack(self.indexformat, *x)[4:]
                         l = struct.pack(versionformat, self.version)
                         fp.write(l)
                         fp.write(e)
                     for i in xrange(1, self.count()):
                         x = self.index[i]
                         e = struct.pack(self.indexformat, *x)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     self.chunkcache = None
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     if self.version == 0:
                         e = (offset, l, base, link, p1, p2, node)
                     else:
                         e = (self.offset_type(offset, 0), l, len(text),
                              base, link, self.rev(p1), self.rev(p2), node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(self.indexformat, *e)
                     if not self.inlinedata():
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, n * len(entry))
                         f = self.opener(self.datafile, "a")
                         if data[0]:
                             f.write(data[0])
                         f.write(data[1])
                         f = self.opener(self.indexfile, "a")
                     else:
                         f = self.opener(self.indexfile, "a+")
+                        f.seek(0, 2)
                         transaction.add(self.indexfile, f.tell())
                     if len(self.index) == 1 and self.version != 0:
                         l = struct.pack(versionformat, self.version)
                         f.write(l)
                         entry = entry[4:]
                     f.write(entry)
                     if self.inlinedata():
                         f.write(data[0])
                         f.write(data[1])
                         self.checkinlinesize(transaction, f)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         d = self.revdiff(a, b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         yield changegroup.genchunk("%s%s" % (meta, d))
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = -1
                     start = end = measure = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
+                    ifh.seek(0, 2)
                     transaction.add(self.indexfile, ifh.tell())
                     if self.inlinedata():
                         dfh = None
                     else:
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                         if chain != prev or (end - start + len(cdelta)) > measure * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             measure = len(text)
                         else:
                             if self.version == 0:
                                 e = (end, len(cdelta), base, link, p1, p2, node)
                             else:
                                 e = (self.offset_type(end, 0), len(cdelta), -1, base,
                                      link, self.rev(p1), self.rev(p2), node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             if self.inlinedata():
                                 ifh.write(struct.pack(self.indexformat, *e))
                                 ifh.write(cdelta)
                                 self.checkinlinesize(transaction, ifh)
                                 if not self.inlinedata():
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 if not dfh:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 dfh.write(cdelta)
                                 ifh.write(struct.pack(self.indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     if node is None:
                         raise RevlogError(_("group to be added is empty"))
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self.loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][-4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self.inlinedata():
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * struct.calcsize(self.indexformat)
                     else:
                         end += rev * struct.calcsize(self.indexformat)
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
                     self.chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(self.indexformat)
                         i = actual / s
                         di = actual - (i * s)
                         if self.inlinedata():
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += self.length(r)
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)