upstream/mercurial-mirror Commit - r1711:8959700c

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

17

18

def hash(text, p1, p2):

18

def hash(text, p1, p2):

19

"""generate a hash from the given text and its parent hashes

19

"""generate a hash from the given text and its parent hashes

20

21

This hash combines both the current file contents and its history

21

This hash combines both the current file contents and its history

22

in a manner that makes it easy to distinguish nodes with the same

22

in a manner that makes it easy to distinguish nodes with the same

23

content in the revision graph.

23

content in the revision graph.

24

"""

24

"""

25

l = [p1, p2]

25

l = [p1, p2]

26

l.sort()

26

l.sort()

27

s = sha.new(l[0])

27

s = sha.new(l[0])

28

s.update(l[1])

28

s.update(l[1])

29

s.update(text)

29

s.update(text)

30

return s.digest()

30

return s.digest()

31

32

def compress(text):

32

def compress(text):

33

""" generate a possibly-compressed representation of text """

33

""" generate a possibly-compressed representation of text """

34

if not text: return ("", text)

34

if not text: return ("", text)

35

if len(text) < 44:

35

if len(text) < 44:

36

if text[0] == '\0': return ("", text)

36

if text[0] == '\0': return ("", text)

37

return ('u', text)

37

return ('u', text)

38

bin = zlib.compress(text)

38

bin = zlib.compress(text)

39

if len(bin) > len(text):

39

if len(bin) > len(text):

40

if text[0] == '\0': return ("", text)

40

if text[0] == '\0': return ("", text)

41

return ('u', text)

41

return ('u', text)

42

return ("", bin)

42

return ("", bin)

43

44

def decompress(bin):

44

def decompress(bin):

45

""" decompress the given input """

45

""" decompress the given input """

46

if not bin: return bin

46

if not bin: return bin

47

t = bin[0]

47

t = bin[0]

48

if t == '\0': return bin

48

if t == '\0': return bin

49

if t == 'x': return zlib.decompress(bin)

49

if t == 'x': return zlib.decompress(bin)

50

if t == 'u': return bin[1:]

50

if t == 'u': return bin[1:]

51

raise RevlogError(_("unknown compression type %s") % t)

51

raise RevlogError(_("unknown compression type %s") % t)

52

53

indexformat = ">4l20s20s20s"

53

indexformat = ">4l20s20s20s"

54

55

class lazyparser(object):

55

class lazyparser(object):

56

"""

56

"""

57

this class avoids the need to parse the entirety of large indices

57

this class avoids the need to parse the entirety of large indices

58

59

By default we parse and load 1000 entries at a time.

59

By default we parse and load 1000 entries at a time.

60

61

If no position is specified, we load the whole index, and replace

61

If no position is specified, we load the whole index, and replace

62

the lazy objects in revlog with the underlying objects for

62

the lazy objects in revlog with the underlying objects for

63

efficiency in cases where we look at most of the nodes.

63

efficiency in cases where we look at most of the nodes.

64

"""

64

"""

65

def __init__(self, data, revlog):

65

def __init__(self, data, revlog):

66

self.data = data

66

self.data = data

67

self.s = struct.calcsize(indexformat)

67

self.s = struct.calcsize(indexformat)

68

self.l = len(data)/self.s

68

self.l = len(data)/self.s

69

self.index = [None] * self.l

69

self.index = [None] * self.l

70

self.map = {nullid: -1}

70

self.map = {nullid: -1}

71

self.all = 0

71

self.all = 0

72

self.revlog = revlog

72

self.revlog = revlog

73

74

def trunc(self, pos):

74

def trunc(self, pos):

75

self.l = pos/self.s

75

self.l = pos/self.s

76

77

def load(self, pos=None):

77

def load(self, pos=None):

78

if self.all: return

78

if self.all: return

79

if pos is not None:

79

if pos is not None:

80

block = pos / 1000

80

block = pos / 1000

81

i = block * 1000

81

i = block * 1000

82

end = min(self.l, i + 1000)

82

end = min(self.l, i + 1000)

83

else:

83

else:

84

self.all = 1

84

self.all = 1

85

i = 0

85

i = 0

86

end = self.l

86

end = self.l

87

self.revlog.index = self.index

87

self.revlog.index = self.index

88

self.revlog.nodemap = self.map

88

self.revlog.nodemap = self.map

89

90

while i < end:

90

while i < end:

91

d = self.data[i * self.s: (i + 1) * self.s]

91

d = self.data[i * self.s: (i + 1) * self.s]

92

e = struct.unpack(indexformat, d)

92

e = struct.unpack(indexformat, d)

93

self.index[i] = e

93

self.index[i] = e

94

self.map[e[6]] = i

94

self.map[e[6]] = i

95

i += 1

95

i += 1

96

97

class lazyindex(object):

97

class lazyindex(object):

98

"""a lazy version of the index array"""

98

"""a lazy version of the index array"""

99

def __init__(self, parser):

99

def __init__(self, parser):

100

self.p = parser

100

self.p = parser

101

def __len__(self):

101

def __len__(self):

102

return len(self.p.index)

102

return len(self.p.index)

103

def load(self, pos):

103

def load(self, pos):

104

if pos < 0:

104

if pos < 0:

105

pos += len(self.p.index)

105

pos += len(self.p.index)

106

self.p.load(pos)

106

self.p.load(pos)

107

return self.p.index[pos]

107

return self.p.index[pos]

108

def __getitem__(self, pos):

108

def __getitem__(self, pos):

109

return self.p.index[pos] or self.load(pos)

109

return self.p.index[pos] or self.load(pos)

110

def __delitem__(self, pos):

110

def __delitem__(self, pos):

111

del self.p.index[pos]

111

del self.p.index[pos]

112

def append(self, e):

112

def append(self, e):

113

self.p.index.append(e)

113

self.p.index.append(e)

114

def trunc(self, pos):

114

def trunc(self, pos):

115

self.p.trunc(pos)

115

self.p.trunc(pos)

116

117

class lazymap(object):

117

class lazymap(object):

118

"""a lazy version of the node map"""

118

"""a lazy version of the node map"""

119

def __init__(self, parser):

119

def __init__(self, parser):

120

self.p = parser

120

self.p = parser

121

def load(self, key):

121

def load(self, key):

122

if self.p.all: return

122

if self.p.all: return

123

n = self.p.data.find(key)

123

n = self.p.data.find(key)

124

if n < 0:

124

if n < 0:

125

raise KeyError(key)

125

raise KeyError(key)

126

pos = n / self.p.s

126

pos = n / self.p.s

127

self.p.load(pos)

127

self.p.load(pos)

128

def __contains__(self, key):

128

def __contains__(self, key):

129

self.p.load()

129

self.p.load()

130

return key in self.p.map

130

return key in self.p.map

131

def __iter__(self):

131

def __iter__(self):

132

yield nullid

132

yield nullid

133

for i in xrange(self.p.l):

133

for i in xrange(self.p.l):

134

try:

134

try:

135

yield self.p.index[i][6]

135

yield self.p.index[i][6]

136

except:

136

except:

137

self.p.load(i)

137

self.p.load(i)

138

yield self.p.index[i][6]

138

yield self.p.index[i][6]

139

def __getitem__(self, key):

139

def __getitem__(self, key):

140

try:

140

try:

141

return self.p.map[key]

141

return self.p.map[key]

142

except KeyError:

142

except KeyError:

143

try:

143

try:

144

self.load(key)

144

self.load(key)

145

return self.p.map[key]

145

return self.p.map[key]

146

except KeyError:

146

except KeyError:

147

raise KeyError("node " + hex(key))

147

raise KeyError("node " + hex(key))

148

def __setitem__(self, key, val):

148

def __setitem__(self, key, val):

149

self.p.map[key] = val

149

self.p.map[key] = val

150

def __delitem__(self, key):

150

def __delitem__(self, key):

151

del self.p.map[key]

151

del self.p.map[key]

152

153

class RevlogError(Exception): pass

153

class RevlogError(Exception): pass

154

155

class revlog(object):

155

class revlog(object):

156

"""

156

"""

157

the underlying revision storage object

157

the underlying revision storage object

158

159

A revlog consists of two parts, an index and the revision data.

159

A revlog consists of two parts, an index and the revision data.

160

161

The index is a file with a fixed record size containing

161

The index is a file with a fixed record size containing

162

information on each revision, includings its nodeid (hash), the

162

information on each revision, includings its nodeid (hash), the

163

nodeids of its parents, the position and offset of its data within

163

nodeids of its parents, the position and offset of its data within

164

the data file, and the revision it's based on. Finally, each entry

164

the data file, and the revision it's based on. Finally, each entry

165

contains a linkrev entry that can serve as a pointer to external

165

contains a linkrev entry that can serve as a pointer to external

166

data.

166

data.

167

168

The revision data itself is a linear collection of data chunks.

168

The revision data itself is a linear collection of data chunks.

169

Each chunk represents a revision and is usually represented as a

169

Each chunk represents a revision and is usually represented as a

170

delta against the previous chunk. To bound lookup time, runs of

170

delta against the previous chunk. To bound lookup time, runs of

171

deltas are limited to about 2 times the length of the original

171

deltas are limited to about 2 times the length of the original

172

version data. This makes retrieval of a version proportional to

172

version data. This makes retrieval of a version proportional to

173

its size, or O(1) relative to the number of revisions.

173

its size, or O(1) relative to the number of revisions.

174

175

Both pieces of the revlog are written to in an append-only

175

Both pieces of the revlog are written to in an append-only

176

fashion, which means we never need to rewrite a file to insert or

176

fashion, which means we never need to rewrite a file to insert or

177

remove data, and can use some simple techniques to avoid the need

177

remove data, and can use some simple techniques to avoid the need

178

for locking while reading.

178

for locking while reading.

179

"""

179

"""

180

def __init__(self, opener, indexfile, datafile):

180

def __init__(self, opener, indexfile, datafile):

181

"""

181

"""

182

create a revlog object

182

create a revlog object

183

184

opener is a function that abstracts the file opening operation

184

opener is a function that abstracts the file opening operation

185

and can be used to implement COW semantics or the like.

185

and can be used to implement COW semantics or the like.

186

"""

186

"""

187

self.indexfile = indexfile

187

self.indexfile = indexfile

188

self.datafile = datafile

188

self.datafile = datafile

189

self.opener = opener

189

self.opener = opener

190

self.cache = None

190

self.cache = None

191

self.chunkcache = None

191

self.chunkcache = None

192

193

try:

193

try:

194

i = self.opener(self.indexfile).read()

194

i = self.opener(self.indexfile).read()

195

except IOError, inst:

195

except IOError, inst:

196

if inst.errno != errno.ENOENT:

196

if inst.errno != errno.ENOENT:

197

raise

197

raise

198

i = ""

198

i = ""

199

200

if i and i[:4] != "\0\0\0\0":

200

if i and i[:4] != "\0\0\0\0":

201

raise RevlogError(_("incompatible revlog signature on %s") %

201

raise RevlogError(_("incompatible revlog signature on %s") %

202

self.indexfile)

202

self.indexfile)

203

204

if len(i) > 10000:

204

if len(i) > 10000:

205

# big index, let's parse it on demand

205

# big index, let's parse it on demand

206

parser = lazyparser(i, self)

206

parser = lazyparser(i, self)

207

self.index = lazyindex(parser)

207

self.index = lazyindex(parser)

208

self.nodemap = lazymap(parser)

208

self.nodemap = lazymap(parser)

209

else:

209

else:

210

s = struct.calcsize(indexformat)

210

s = struct.calcsize(indexformat)

211

l = len(i) / s

211

l = len(i) / s

212

self.index = [None] * l

212

self.index = [None] * l

213

m = [None] * l

213

m = [None] * l

214

215

n = 0

215

n = 0

216

for f in xrange(0, l * s, s):

216

for f in xrange(0, l * s, s):

217

# offset, size, base, linkrev, p1, p2, nodeid

217

# offset, size, base, linkrev, p1, p2, nodeid

218

e = struct.unpack(indexformat, i[f:f + s])

218

e = struct.unpack(indexformat, i[f:f + s])

219

m[n] = (e[6], n)

219

m[n] = (e[6], n)

220

self.index[n] = e

220

self.index[n] = e

221

n += 1

221

n += 1

222

223

self.nodemap = dict(m)

223

self.nodemap = dict(m)

224

self.nodemap[nullid] = -1

224

self.nodemap[nullid] = -1

225

226

def tip(self): return self.node(len(self.index) - 1)

226

def tip(self): return self.node(len(self.index) - 1)

227

def count(self): return len(self.index)

227

def count(self): return len(self.index)

228

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

228

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

229

def rev(self, node):

229

def rev(self, node):

230

try:

230

try:

231

return self.nodemap[node]

231

return self.nodemap[node]

232

except KeyError:

232

except KeyError:

233

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

233

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

234

def linkrev(self, node): return self.index[self.rev(node)][3]

234

def linkrev(self, node): return self.index[self.rev(node)][3]

235

def parents(self, node):

235

def parents(self, node):

236

if node == nullid: return (nullid, nullid)

236

if node == nullid: return (nullid, nullid)

237

return self.index[self.rev(node)][4:6]

237

return self.index[self.rev(node)][4:6]

238

239

def start(self, rev): return self.index[rev][0]

239

def start(self, rev): return self.index[rev][0]

240

def length(self, rev): return self.index[rev][1]

240

def length(self, rev): return self.index[rev][1]

241

def end(self, rev): return self.start(rev) + self.length(rev)

241

def end(self, rev): return self.start(rev) + self.length(rev)

242

def base(self, rev): return self.index[rev][2]

242

def base(self, rev): return self.index[rev][2]

243

244

def reachable(self, rev, stop=None):

244

def reachable(self, rev, stop=None):

245

reachable = {}

245

reachable = {}

246

visit = [rev]

246

visit = [rev]

247

reachable[rev] = 1

247

reachable[rev] = 1

248

if stop:

248

if stop:

249

stopn = self.rev(stop)

249

stopn = self.rev(stop)

250

else:

250

else:

251

stopn = 0

251

stopn = 0

252

while visit:

252

while visit:

253

n = visit.pop(0)

253

n = visit.pop(0)

254

if n == stop:

254

if n == stop:

255

continue

255

continue

256

if n == nullid:

256

if n == nullid:

257

continue

257

continue

258

for p in self.parents(n):

258

for p in self.parents(n):

259

if self.rev(p) < stopn:

259

if self.rev(p) < stopn:

260

continue

260

continue

261

if p not in reachable:

261

if p not in reachable:

262

reachable[p] = 1

262

reachable[p] = 1

263

visit.append(p)

263

visit.append(p)

264

return reachable

264

return reachable

265

266

def nodesbetween(self, roots=None, heads=None):

266

def nodesbetween(self, roots=None, heads=None):

267

"""Return a tuple containing three elements. Elements 1 and 2 contain

267

"""Return a tuple containing three elements. Elements 1 and 2 contain

268

a final list bases and heads after all the unreachable ones have been

268

a final list bases and heads after all the unreachable ones have been

269

pruned. Element 0 contains a topologically sorted list of all

269

pruned. Element 0 contains a topologically sorted list of all

270

271

nodes that satisfy these constraints:

271

nodes that satisfy these constraints:

272

1. All nodes must be descended from a node in roots (the nodes on

272

1. All nodes must be descended from a node in roots (the nodes on

273

roots are considered descended from themselves).

273

roots are considered descended from themselves).

274

2. All nodes must also be ancestors of a node in heads (the nodes in

274

2. All nodes must also be ancestors of a node in heads (the nodes in

275

heads are considered to be their own ancestors).

275

heads are considered to be their own ancestors).

276

277

If roots is unspecified, nullid is assumed as the only root.

277

If roots is unspecified, nullid is assumed as the only root.

278

If heads is unspecified, it is taken to be the output of the

278

If heads is unspecified, it is taken to be the output of the

279

heads method (i.e. a list of all nodes in the repository that

279

heads method (i.e. a list of all nodes in the repository that

280

have no children)."""

280

have no children)."""

281

nonodes = ([], [], [])

281

nonodes = ([], [], [])

282

if roots is not None:

282

if roots is not None:

283

roots = list(roots)

283

roots = list(roots)

284

if not roots:

284

if not roots:

285

return nonodes

285

return nonodes

286

lowestrev = min([self.rev(n) for n in roots])

286

lowestrev = min([self.rev(n) for n in roots])

287

else:

287

else:

288

roots = [nullid] # Everybody's a descendent of nullid

288

roots = [nullid] # Everybody's a descendent of nullid

289

lowestrev = -1

289

lowestrev = -1

290

if (lowestrev == -1) and (heads is None):

290

if (lowestrev == -1) and (heads is None):

291

# We want _all_ the nodes!

291

# We want _all_ the nodes!

292

return ([self.node(r) for r in xrange(0, self.count())],

292

return ([self.node(r) for r in xrange(0, self.count())],

293

[nullid], list(self.heads()))

293

[nullid], list(self.heads()))

294

if heads is None:

294

if heads is None:

295

# All nodes are ancestors, so the latest ancestor is the last

295

# All nodes are ancestors, so the latest ancestor is the last

296

# node.

296

# node.

297

highestrev = self.count() - 1

297

highestrev = self.count() - 1

298

# Set ancestors to None to signal that every node is an ancestor.

298

# Set ancestors to None to signal that every node is an ancestor.

299

ancestors = None

299

ancestors = None

300

# Set heads to an empty dictionary for later discovery of heads

300

# Set heads to an empty dictionary for later discovery of heads

301

heads = {}

301

heads = {}

302

else:

302

else:

303

heads = list(heads)

303

heads = list(heads)

304

if not heads:

304

if not heads:

305

return nonodes

305

return nonodes

306

ancestors = {}

306

ancestors = {}

307

# Start at the top and keep marking parents until we're done.

307

# Start at the top and keep marking parents until we're done.

308

nodestotag = heads[:]

308

nodestotag = heads[:]

309

# Turn heads into a dictionary so we can remove 'fake' heads.

309

# Turn heads into a dictionary so we can remove 'fake' heads.

310

# Also, later we will be using it to filter out the heads we can't

310

# Also, later we will be using it to filter out the heads we can't

311

# find from roots.

311

# find from roots.

312

heads = dict.fromkeys(heads, 0)

312

heads = dict.fromkeys(heads, 0)

313

# Remember where the top was so we can use it as a limit later.

313

# Remember where the top was so we can use it as a limit later.

314

highestrev = max([self.rev(n) for n in nodestotag])

314

highestrev = max([self.rev(n) for n in nodestotag])

315

while nodestotag:

315

while nodestotag:

316

# grab a node to tag

316

# grab a node to tag

317

n = nodestotag.pop()

317

n = nodestotag.pop()

318

# Never tag nullid

318

# Never tag nullid

319

if n == nullid:

319

if n == nullid:

320

continue

320

continue

321

# A node's revision number represents its place in a

321

# A node's revision number represents its place in a

322

# topologically sorted list of nodes.

322

# topologically sorted list of nodes.

323

r = self.rev(n)

323

r = self.rev(n)

324

if r >= lowestrev:

324

if r >= lowestrev:

325

if n not in ancestors:

325

if n not in ancestors:

326

# If we are possibly a descendent of one of the roots

326

# If we are possibly a descendent of one of the roots

327

# and we haven't already been marked as an ancestor

327

# and we haven't already been marked as an ancestor

328

ancestors[n] = 1 # Mark as ancestor

328

ancestors[n] = 1 # Mark as ancestor

329

# Add non-nullid parents to list of nodes to tag.

329

# Add non-nullid parents to list of nodes to tag.

330

nodestotag.extend([p for p in self.parents(n) if

330

nodestotag.extend([p for p in self.parents(n) if

331

p != nullid])

331

p != nullid])

332

elif n in heads: # We've seen it before, is it a fake head?

332

elif n in heads: # We've seen it before, is it a fake head?

333

# So it is, real heads should not be the ancestors of

333

# So it is, real heads should not be the ancestors of

334

# any other heads.

334

# any other heads.

335

heads.pop(n)

335

heads.pop(n)

336

if not ancestors:

336

if not ancestors:

337

return nonodes

337

return nonodes

338

# Now that we have our set of ancestors, we want to remove any

338

# Now that we have our set of ancestors, we want to remove any

339

# roots that are not ancestors.

339

# roots that are not ancestors.

340

341

# If one of the roots was nullid, everything is included anyway.

341

# If one of the roots was nullid, everything is included anyway.

342

if lowestrev > -1:

342

if lowestrev > -1:

343

# But, since we weren't, let's recompute the lowest rev to not

343

# But, since we weren't, let's recompute the lowest rev to not

344

# include roots that aren't ancestors.

344

# include roots that aren't ancestors.

345

346

# Filter out roots that aren't ancestors of heads

346

# Filter out roots that aren't ancestors of heads

347

roots = [n for n in roots if n in ancestors]

347

roots = [n for n in roots if n in ancestors]

348

# Recompute the lowest revision

348

# Recompute the lowest revision

349

if roots:

349

if roots:

350

lowestrev = min([self.rev(n) for n in roots])

350

lowestrev = min([self.rev(n) for n in roots])

351

else:

351

else:

352

# No more roots? Return empty list

352

# No more roots? Return empty list

353

return nonodes

353

return nonodes

354

else:

354

else:

355

# We are descending from nullid, and don't need to care about

355

# We are descending from nullid, and don't need to care about

356

# any other roots.

356

# any other roots.

357

lowestrev = -1

357

lowestrev = -1

358

roots = [nullid]

358

roots = [nullid]

359

# Transform our roots list into a 'set' (i.e. a dictionary where the

359

# Transform our roots list into a 'set' (i.e. a dictionary where the

360

# values don't matter.

360

# values don't matter.

361

descendents = dict.fromkeys(roots, 1)

361

descendents = dict.fromkeys(roots, 1)

362

# Also, keep the original roots so we can filter out roots that aren't

362

# Also, keep the original roots so we can filter out roots that aren't

363

# 'real' roots (i.e. are descended from other roots).

363

# 'real' roots (i.e. are descended from other roots).

364

roots = descendents.copy()

364

roots = descendents.copy()

365

# Our topologically sorted list of output nodes.

365

# Our topologically sorted list of output nodes.

366

orderedout = []

366

orderedout = []

367

# Don't start at nullid since we don't want nullid in our output list,

367

# Don't start at nullid since we don't want nullid in our output list,

368

# and if nullid shows up in descedents, empty parents will look like

368

# and if nullid shows up in descedents, empty parents will look like

369

# they're descendents.

369

# they're descendents.

370

for r in xrange(max(lowestrev, 0), highestrev + 1):

370

for r in xrange(max(lowestrev, 0), highestrev + 1):

371

n = self.node(r)

371

n = self.node(r)

372

isdescendent = False

372

isdescendent = False

373

if lowestrev == -1: # Everybody is a descendent of nullid

373

if lowestrev == -1: # Everybody is a descendent of nullid

374

isdescendent = True

374

isdescendent = True

375

elif n in descendents:

375

elif n in descendents:

376

# n is already a descendent

376

# n is already a descendent

377

isdescendent = True

377

isdescendent = True

378

# This check only needs to be done here because all the roots

378

# This check only needs to be done here because all the roots

379

# will start being marked is descendents before the loop.

379

# will start being marked is descendents before the loop.

380

if n in roots:

380

if n in roots:

381

# If n was a root, check if it's a 'real' root.

381

# If n was a root, check if it's a 'real' root.

382

p = tuple(self.parents(n))

382

p = tuple(self.parents(n))

383

# If any of its parents are descendents, it's not a root.

383

# If any of its parents are descendents, it's not a root.

384

if (p[0] in descendents) or (p[1] in descendents):

384

if (p[0] in descendents) or (p[1] in descendents):

385

roots.pop(n)

385

roots.pop(n)

386

else:

386

else:

387

p = tuple(self.parents(n))

387

p = tuple(self.parents(n))

388

# A node is a descendent if either of its parents are

388

# A node is a descendent if either of its parents are

389

# descendents. (We seeded the dependents list with the roots

389

# descendents. (We seeded the dependents list with the roots

390

# up there, remember?)

390

# up there, remember?)

391

if (p[0] in descendents) or (p[1] in descendents):

391

if (p[0] in descendents) or (p[1] in descendents):

392

descendents[n] = 1

392

descendents[n] = 1

393

isdescendent = True

393

isdescendent = True

394

if isdescendent and ((ancestors is None) or (n in ancestors)):

394

if isdescendent and ((ancestors is None) or (n in ancestors)):

395

# Only include nodes that are both descendents and ancestors.

395

# Only include nodes that are both descendents and ancestors.

396

orderedout.append(n)

396

orderedout.append(n)

397

if (ancestors is not None) and (n in heads):

397

if (ancestors is not None) and (n in heads):

398

# We're trying to figure out which heads are reachable

398

# We're trying to figure out which heads are reachable

399

# from roots.

399

# from roots.

400

# Mark this head as having been reached

400

# Mark this head as having been reached

401

heads[n] = 1

401

heads[n] = 1

402

elif ancestors is None:

402

elif ancestors is None:

403

# Otherwise, we're trying to discover the heads.

403

# Otherwise, we're trying to discover the heads.

404

# Assume this is a head because if it isn't, the next step

404

# Assume this is a head because if it isn't, the next step

405

# will eventually remove it.

405

# will eventually remove it.

406

heads[n] = 1

406

heads[n] = 1

407

# But, obviously its parents aren't.

407

# But, obviously its parents aren't.

408

for p in self.parents(n):

408

for p in self.parents(n):

409

heads.pop(p, None)

409

heads.pop(p, None)

410

heads = [n for n in heads.iterkeys() if heads[n] != 0]

410

heads = [n for n in heads.iterkeys() if heads[n] != 0]

411

roots = roots.keys()

411

roots = roots.keys()

412

assert orderedout

412

assert orderedout

413

assert roots

413

assert roots

414

assert heads

414

assert heads

415

return (orderedout, roots, heads)

415

return (orderedout, roots, heads)

416

417

def heads(self, start=None):

417

def heads(self, start=None):

418

"""return the list of all nodes that have no children

418

"""return the list of all nodes that have no children

419

420

if start is specified, only heads that are descendants of

420

if start is specified, only heads that are descendants of

421

start will be returned

421

start will be returned

422

423

"""

423

"""

424

if start is None:

424

if start is None:

425

start = nullid

425

start = nullid

426

reachable = {start: 1}

426

reachable = {start: 1}

427

heads = {start: 1}

427

heads = {start: 1}

428

startrev = self.rev(start)

428

startrev = self.rev(start)

429

430

for r in xrange(startrev + 1, self.count()):

430

for r in xrange(startrev + 1, self.count()):

431

n = self.node(r)

431

n = self.node(r)

432

for pn in self.parents(n):

432

for pn in self.parents(n):

433

if pn in reachable:

433

if pn in reachable:

434

reachable[n] = 1

434

reachable[n] = 1

435

heads[n] = 1

435

heads[n] = 1

436

if pn in heads:

436

if pn in heads:

437

del heads[pn]

437

del heads[pn]

438

return heads.keys()

438

return heads.keys()

439

440

def children(self, node):

440

def children(self, node):

441

"""find the children of a given node"""

441

"""find the children of a given node"""

442

c = []

442

c = []

443

p = self.rev(node)

443

p = self.rev(node)

444

for r in range(p + 1, self.count()):

444

for r in range(p + 1, self.count()):

445

n = self.node(r)

445

n = self.node(r)

446

for pn in self.parents(n):

446

for pn in self.parents(n):

447

if pn == node:

447

if pn == node:

448

c.append(n)

448

c.append(n)

449

continue

449

continue

450

elif pn == nullid:

450

elif pn == nullid:

451

continue

451

continue

452

return c

452

return c

453

454

def lookup(self, id):

454

def lookup(self, id):

455

"""locate a node based on revision number or subset of hex nodeid"""

455

"""locate a node based on revision number or subset of hex nodeid"""

456

try:

456

try:

457

rev = int(id)

457

rev = int(id)

458

if str(rev) != id: raise ValueError

458

if str(rev) != id: raise ValueError

459

if rev < 0: rev = self.count() + rev

459

if rev < 0: rev = self.count() + rev

460

if rev < 0 or rev >= self.count(): raise ValueError

460

if rev < 0 or rev >= self.count(): raise ValueError

461

return self.node(rev)

461

return self.node(rev)

462

except (ValueError, OverflowError):

462

except (ValueError, OverflowError):

463

c = []

463

c = []

464

for n in self.nodemap:

464

for n in self.nodemap:

465

if hex(n).startswith(id):

465

if hex(n).startswith(id):

466

c.append(n)

466

c.append(n)

467

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

467

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

468

if len(c) < 1: raise RevlogError(_("No match found"))

468

if len(c) < 1: raise RevlogError(_("No match found"))

469

return c[0]

469

return c[0]

470

471

return None

471

return None

472

473

def diff(self, a, b):

473

def diff(self, a, b):

474

"""return a delta between two revisions"""

474

"""return a delta between two revisions"""

475

return mdiff.textdiff(a, b)

475

return mdiff.textdiff(a, b)

476

477

def patches(self, t, pl):

477

def patches(self, t, pl):

478

"""apply a list of patches to a string"""

478

"""apply a list of patches to a string"""

479

return mdiff.patches(t, pl)

479

return mdiff.patches(t, pl)

480

481

def chunk(self, rev):

481

def chunk(self, rev):

482

start, length = self.start(rev), self.length(rev)

482

start, length = self.start(rev), self.length(rev)

483

end = start + length

483

end = start + length

484

485

def loadcache():

485

def loadcache():

486

cache_length = max(4096 * 1024, length) # 4Mo

486

cache_length = max(4096 * 1024, length) # 4Mo

487

df = self.opener(self.datafile)

487

df = self.opener(self.datafile)

488

df.seek(start)

488

df.seek(start)

489

self.chunkcache = (start, df.read(cache_length))

489

self.chunkcache = (start, df.read(cache_length))

490

491

if not self.chunkcache:

491

if not self.chunkcache:

492

loadcache()

492

loadcache()

493

494

cache_start = self.chunkcache[0]

494

cache_start = self.chunkcache[0]

495

cache_end = cache_start + len(self.chunkcache[1])

495

cache_end = cache_start + len(self.chunkcache[1])

496

if start >= cache_start and end <= cache_end:

496

if start >= cache_start and end <= cache_end:

497

# it is cached

497

# it is cached

498

offset = start - cache_start

498

offset = start - cache_start

499

else:

499

else:

500

loadcache()

500

loadcache()

501

offset = 0

501

offset = 0

502

503

#def checkchunk():

503

#def checkchunk():

504

# df = self.opener(self.datafile)

504

# df = self.opener(self.datafile)

505

# df.seek(start)

505

# df.seek(start)

506

# return df.read(length)

506

# return df.read(length)

507

#assert s == checkchunk()

507

#assert s == checkchunk()

508

return decompress(self.chunkcache[1][offset:offset + length])

508

return decompress(self.chunkcache[1][offset:offset + length])

509

510

def delta(self, node):

510

def delta(self, node):

511

"""return or calculate a delta between a node and its predecessor"""

511

"""return or calculate a delta between a node and its predecessor"""

512

r = self.rev(node)

512

r = self.rev(node)

513

b = self.base(r)

513

b = self.base(r)

514

if r == b:

514

if r == b:

515

return self.diff(self.revision(self.node(r - 1)),

515

return self.diff(self.revision(self.node(r - 1)),

516

self.revision(node))

516

self.revision(node))

517

else:

517

else:

518

return self.chunk(r)

518

return self.chunk(r)

519

520

def revision(self, node):

520

def revision(self, node):

521

"""return an uncompressed revision of a given"""

521

"""return an uncompressed revision of a given"""

522

if node == nullid: return ""

522

if node == nullid: return ""

523

if self.cache and self.cache[0] == node: return self.cache[2]

523

if self.cache and self.cache[0] == node: return self.cache[2]

524

525

# look up what we need to read

525

# look up what we need to read

526

text = None

526

text = None

527

rev = self.rev(node)

527

rev = self.rev(node)

528

base = self.base(rev)

528

base = self.base(rev)

529

530

# do we have useful data cached?

530

# do we have useful data cached?

531

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

531

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

532

base = self.cache[1]

532

base = self.cache[1]

533

text = self.cache[2]

533

text = self.cache[2]

534

else:

534

else:

535

text = self.chunk(base)

535

text = self.chunk(base)

536

537

bins = []

537

bins = []

538

for r in xrange(base + 1, rev + 1):

538

for r in xrange(base + 1, rev + 1):

539

bins.append(self.chunk(r))

539

bins.append(self.chunk(r))

540

541

text = mdiff.patches(text, bins)

541

text = mdiff.patches(text, bins)

542

543

p1, p2 = self.parents(node)

543

p1, p2 = self.parents(node)

544

if node != hash(text, p1, p2):

544

if node != hash(text, p1, p2):

545

raise RevlogError(_("integrity check failed on %s:%d")

545

raise RevlogError(_("integrity check failed on %s:%d")

546

% (self.datafile, rev))

546

% (self.datafile, rev))

547

548

self.cache = (node, rev, text)

548

self.cache = (node, rev, text)

549

return text

549

return text

550

551

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

551

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

552

"""add a revision to the log

552

"""add a revision to the log

553

554

text - the revision data to add

554

text - the revision data to add

555

transaction - the transaction object used for rollback

555

transaction - the transaction object used for rollback

556

link - the linkrev data to add

556

link - the linkrev data to add

557

p1, p2 - the parent nodeids of the revision

557

p1, p2 - the parent nodeids of the revision

558

d - an optional precomputed delta

558

d - an optional precomputed delta

559

"""

559

"""

560

if text is None: text = ""

560

if text is None: text = ""

561

if p1 is None: p1 = self.tip()

561

if p1 is None: p1 = self.tip()

562

if p2 is None: p2 = nullid

562

if p2 is None: p2 = nullid

563

564

node = hash(text, p1, p2)

564

node = hash(text, p1, p2)

565

566

if node in self.nodemap:

566

if node in self.nodemap:

567

return node

567

return node

568

569

n = self.count()

569

n = self.count()

570

t = n - 1

570

t = n - 1

571

572

if n:

572

if n:

573

base = self.base(t)

573

base = self.base(t)

574

start = self.start(base)

574

start = self.start(base)

575

end = self.end(t)

575

end = self.end(t)

576

if not d:

576

if not d:

577

prev = self.revision(self.tip())

577

prev = self.revision(self.tip())

578

d = self.diff(prev, str(text))

578

d = self.diff(prev, str(text))

579

data = compress(d)

579

data = compress(d)

580

l = len(data[1]) + len(data[0])

580

l = len(data[1]) + len(data[0])

581

dist = end - start + l

581

dist = end - start + l

582

583

# full versions are inserted when the needed deltas

583

# full versions are inserted when the needed deltas

584

# become comparable to the uncompressed text

584

# become comparable to the uncompressed text

585

if not n or dist > len(text) * 2:

585

if not n or dist > len(text) * 2:

586

data = compress(text)

586

data = compress(text)

587

l = len(data[1]) + len(data[0])

587

l = len(data[1]) + len(data[0])

588

base = n

588

base = n

589

else:

589

else:

590

base = self.base(t)

590

base = self.base(t)

591

592

offset = 0

592

offset = 0

593

if t >= 0:

593

if t >= 0:

594

offset = self.end(t)

594

offset = self.end(t)

595

596

e = (offset, l, base, link, p1, p2, node)

596

e = (offset, l, base, link, p1, p2, node)

597

598

self.index.append(e)

598

self.index.append(e)

599

self.nodemap[node] = n

599

self.nodemap[node] = n

600

entry = struct.pack(indexformat, *e)

600

entry = struct.pack(indexformat, *e)

601

602

transaction.add(self.datafile, e[0])

602

transaction.add(self.datafile, e[0])

603

f = self.opener(self.datafile, "a")

603

f = self.opener(self.datafile, "a")

604

if data[0]:

604

if data[0]:

605

f.write(data[0])

605

f.write(data[0])

606

f.write(data[1])

606

f.write(data[1])

607

transaction.add(self.indexfile, n * len(entry))

607

transaction.add(self.indexfile, n * len(entry))

608

self.opener(self.indexfile, "a").write(entry)

608

self.opener(self.indexfile, "a").write(entry)

609

610

self.cache = (node, n, text)

610

self.cache = (node, n, text)

611

return node

611

return node

612

613

def ancestor(self, a, b):

613

def ancestor(self, a, b):

614

"""calculate the least common ancestor of nodes a and b"""

614

"""calculate the least common ancestor of nodes a and b"""

615

# calculate the distance of every node from root

615

# calculate the distance of every node from root

616

dist = {nullid: 0}

616

dist = {nullid: 0}

617

for i in xrange(self.count()):

617

for i in xrange(self.count()):

618

n = self.node(i)

618

n = self.node(i)

619

p1, p2 = self.parents(n)

619

p1, p2 = self.parents(n)

620

dist[n] = max(dist[p1], dist[p2]) + 1

620

dist[n] = max(dist[p1], dist[p2]) + 1

621

622

# traverse ancestors in order of decreasing distance from root

622

# traverse ancestors in order of decreasing distance from root

623

def ancestors(node):

623

def ancestors(node):

624

# we store negative distances because heap returns smallest member

624

# we store negative distances because heap returns smallest member

625

h = [(-dist[node], node)]

625

h = [(-dist[node], node)]

626

seen = {}

626

seen = {}

627

earliest = self.count()

627

earliest = self.count()

628

while h:

628

while h:

629

d, n = heapq.heappop(h)

629

d, n = heapq.heappop(h)

630

if n not in seen:

630

if n not in seen:

631

seen[n] = 1

631

seen[n] = 1

632

r = self.rev(n)

632

r = self.rev(n)

633

yield (-d, n)

633

yield (-d, n)

634

for p in self.parents(n):

634

for p in self.parents(n):

635

heapq.heappush(h, (-dist[p], p))

635

heapq.heappush(h, (-dist[p], p))

636

637

def generations(node):

637

def generations(node):

638

sg, s = None, {}

638

sg, s = None, {}

639

for g,n in ancestors(node):

639

for g,n in ancestors(node):

640

if g != sg:

640

if g != sg:

641

if sg:

641

if sg:

642

yield sg, s

642

yield sg, s

643

sg, s = g, {n:1}

643

sg, s = g, {n:1}

644

else:

644

else:

645

s[n] = 1

645

s[n] = 1

646

yield sg, s

646

yield sg, s

647

648

x = generations(a)

648

x = generations(a)

649

y = generations(b)

649

y = generations(b)

650

gx = x.next()

650

gx = x.next()

651

gy = y.next()

651

gy = y.next()

652

653

# increment each ancestor list until it is closer to root than

653

# increment each ancestor list until it is closer to root than

654

# the other, or they match

654

# the other, or they match

655

while 1:

655

while 1:

656

#print "ancestor gen %s %s" % (gx[0], gy[0])

656

#print "ancestor gen %s %s" % (gx[0], gy[0])

657

if gx[0] == gy[0]:

657

if gx[0] == gy[0]:

658

# find the intersection

658

# find the intersection

659

i = [ n for n in gx[1] if n in gy[1] ]

659

i = [ n for n in gx[1] if n in gy[1] ]

660

if i:

660

if i:

661

return i[0]

661

return i[0]

662

else:

662

else:

663

#print "next"

663

#print "next"

664

gy = y.next()

664

gy = y.next()

665

gx = x.next()

665

gx = x.next()

666

elif gx[0] < gy[0]:

666

elif gx[0] < gy[0]:

667

#print "next y"

667

#print "next y"

668

gy = y.next()

668

gy = y.next()

669

else:

669

else:

670

#print "next x"

670

#print "next x"

671

gx = x.next()

671

gx = x.next()

672

673

def group(self, nodelist, lookup, infocollect=None):

673

def group(self, nodelist, lookup, infocollect=None):

674

"""calculate a delta group

674

"""calculate a delta group

675

676

Given a list of changeset revs, return a set of deltas and

676

Given a list of changeset revs, return a set of deltas and

677

metadata corresponding to nodes. the first delta is

677

metadata corresponding to nodes. the first delta is

678

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

678

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

679

have this parent as it has all history before these

679

have this parent as it has all history before these

680

changesets. parent is parent[0]

680

changesets. parent is parent[0]

681

"""

681

"""

682

revs = [self.rev(n) for n in nodelist]

682

revs = [self.rev(n) for n in nodelist]

683

684

# if we don't have any revisions touched by these changesets, bail

684

# if we don't have any revisions touched by these changesets, bail

685

if not revs:

685

if not revs:

686

yield struct.pack(">l", 0)

686

yield struct.pack(">l", 0)

687

return

687

return

688

689

# add the parent of the first rev

689

# add the parent of the first rev

690

p = self.parents(self.node(revs[0]))[0]

690

p = self.parents(self.node(revs[0]))[0]

691

revs.insert(0, self.rev(p))

691

revs.insert(0, self.rev(p))

692

693

# helper to reconstruct intermediate versions

693

# helper to reconstruct intermediate versions

694

def construct(text, base, rev):

694

def construct(text, base, rev):

695

bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)]

695

bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)]

696

return mdiff.patches(text, bins)

696

return mdiff.patches(text, bins)

697

698

# build deltas

698

# build deltas

699

for d in xrange(0, len(revs) - 1):

699

for d in xrange(0, len(revs) - 1):

700

a, b = revs[d], revs[d + 1]

700

a, b = revs[d], revs[d + 1]

701

na = self.node(a)

701

na = self.node(a)

702

nb = self.node(b)

702

nb = self.node(b)

703

704

if infocollect is not None:

704

if infocollect is not None:

705

infocollect(nb)

705

infocollect(nb)

706

707

# do we need to construct a new delta?

707

# do we need to construct a new delta?

708

if a + 1 != b or self.base(b) == b:

708

if a + 1 != b or self.base(b) == b:

709

ta = self.revision(na)

709

ta = self.revision(na)

710

tb = self.revision(nb)

710

tb = self.revision(nb)

711

d = self.diff(ta, tb)

711

d = self.diff(ta, tb)

712

else:

712

else:

713

d = self.chunk(b)

713

d = self.chunk(b)

714

715

p = self.parents(nb)

715

p = self.parents(nb)

716

meta = nb + p[0] + p[1] + lookup(nb)

716

meta = nb + p[0] + p[1] + lookup(nb)

717

l = struct.pack(">l", len(meta) + len(d) + 4)

717

l = struct.pack(">l", len(meta) + len(d) + 4)

718

yield l

718

yield l

719

yield meta

719

yield meta

720

yield d

720

yield d

721

722

yield struct.pack(">l", 0)

722

yield struct.pack(">l", 0)

723

724

def addgroup(self, revs, linkmapper, transaction, unique=0):

724

def addgroup(self, revs, linkmapper, transaction, unique=0):

725

"""

725

"""

726

add a delta group

726

add a delta group

727

728

given a set of deltas, add them to the revision log. the

728

given a set of deltas, add them to the revision log. the

729

first delta is against its parent, which should be in our

729

first delta is against its parent, which should be in our

730

log, the rest are against the previous delta.

730

log, the rest are against the previous delta.

731

"""

731

"""

732

733

#track the base of the current delta log

733

#track the base of the current delta log

734

r = self.count()

734

r = self.count()

735

t = r - 1

735

t = r - 1

736

node = nullid

736

node = nullid

737

738

base = prev = -1

738

base = prev = -1

739

start = end = measure = 0

739

start = end = measure = 0

740

if r:

740

if r:

741

start = self.start(self.base(t))

741

start = self.start(self.base(t))

742

end = self.end(t)

742

end = self.end(t)

743

measure = self.length(self.base(t))

743

measure = self.length(self.base(t))

744

base = self.base(t)

744

base = self.base(t)

745

prev = self.tip()

745

prev = self.tip()

746

747

transaction.add(self.datafile, end)

747

transaction.add(self.datafile, end)

748

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

748

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

749

dfh = self.opener(self.datafile, "a")

749

dfh = self.opener(self.datafile, "a")

750

ifh = self.opener(self.indexfile, "a")

750

ifh = self.opener(self.indexfile, "a")

751

752

# loop through our set of deltas

752

# loop through our set of deltas

753

chain = None

753

chain = None

754

for chunk in revs:

754

for chunk in revs:

755

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

755

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

756

link = linkmapper(cs)

756

link = linkmapper(cs)

757

if node in self.nodemap:

757

if node in self.nodemap:

758

# this can happen if two branches make the same change

758

# this can happen if two branches make the same change

759

# if unique:

759

# if unique:

760

# raise RevlogError(_("already have %s") % hex(node[:4]))

760

# raise RevlogError(_("already have %s") % hex(node[:4]))

761

chain = node

761

chain = node

762

continue

762

continue

763

delta = chunk[80:]

763

delta = chunk[80:]

764

765

for p in (p1, p2):

765

for p in (p1, p2):

766

if not p in self.nodemap:

766

if not p in self.nodemap:

767

raise RevlogError(_("unknown parent %s") % short(p1))

767

raise RevlogError(_("unknown parent %s") % short(p1))

768

769

if not chain:

769

if not chain:

770

# retrieve the parent revision of the delta chain

770

# retrieve the parent revision of the delta chain

771

chain = p1

771

chain = p1

772

if not chain in self.nodemap:

772

if not chain in self.nodemap:

773

raise RevlogError(_("unknown base %s") % short(chain[:4]))

773

raise RevlogError(_("unknown base %s") % short(chain[:4]))

774

775

# full versions are inserted when the needed deltas become

775

# full versions are inserted when the needed deltas become

776

# comparable to the uncompressed text or when the previous

776

# comparable to the uncompressed text or when the previous

777

# version is not the one we have a delta against. We use

777

# version is not the one we have a delta against. We use

778

# the size of the previous full rev as a proxy for the

778

# the size of the previous full rev as a proxy for the

779

# current size.

779

# current size.

780

781

if chain == prev:

781

if chain == prev:

782

tempd = compress(delta)

782

tempd = compress(delta)

783

cdelta = tempd[0] + tempd[1]

783

cdelta = tempd[0] + tempd[1]

784

785

if chain != prev or (end - start + len(cdelta)) > measure * 2:

785

if chain != prev or (end - start + len(cdelta)) > measure * 2:

786

# flush our writes here so we can read it in revision

786

# flush our writes here so we can read it in revision

787

dfh.flush()

787

dfh.flush()

788

ifh.flush()

788

ifh.flush()

789

text = self.revision(chain)

789

text = self.revision(chain)

790

text = self.patches(text, [delta])

790

text = self.patches(text, [delta])

791

chk = self.addrevision(text, transaction, link, p1, p2)

791

chk = self.addrevision(text, transaction, link, p1, p2)

792

if chk != node:

792

if chk != node:

793

raise RevlogError(_("consistency error adding group"))

793

raise RevlogError(_("consistency error adding group"))

794

measure = len(text)

794

measure = len(text)

795

else:

795

else:

796

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

796

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

797

self.index.append(e)

797

self.index.append(e)

798

self.nodemap[node] = r

798

self.nodemap[node] = r

799

dfh.write(cdelta)

799

dfh.write(cdelta)

800

ifh.write(struct.pack(indexformat, *e))

800

ifh.write(struct.pack(indexformat, *e))

801

802

t, r, chain, prev = r, r + 1, node, node

802

t, r, chain, prev = r, r + 1, node, node

803

start = self.start(self.base(t))

803

start = self.start(self.base(t))

804

end = self.end(t)

804

end = self.end(t)

805

806

dfh.close()

806

dfh.close()

807

ifh.close()

807

ifh.close()

808

return node

808

return node

809

810

def strip(self, rev, minlink):

810

def strip(self, rev, minlink):

811

if self.count() == 0 or rev >= self.count():

811

if self.count() == 0 or rev >= self.count():

812

return

812

return

813

814

# When stripping away a revision, we need to make sure it

814

# When stripping away a revision, we need to make sure it

815

# does not actually belong to an older changeset.

815

# does not actually belong to an older changeset.

816

# The minlink parameter defines the oldest revision

816

# The minlink parameter defines the oldest revision

817

# we're allowed to strip away.

817

# we're allowed to strip away.

818

while minlink > self.index[rev][3]:

818

while minlink > self.index[rev][3]:

819

rev += 1

819

rev += 1

820

if rev >= self.count():

820

if rev >= self.count():

821

return

821

return

822

823

# first truncate the files on disk

823

# first truncate the files on disk

824

end = self.start(rev)

824

end = self.start(rev)

825

self.opener(self.datafile, "a").truncate(end)

825

self.opener(self.datafile, "a").truncate(end)

826

end = rev * struct.calcsize(indexformat)

826

end = rev * struct.calcsize(indexformat)

827

self.opener(self.indexfile, "a").truncate(end)

827

self.opener(self.indexfile, "a").truncate(end)

828

829

# then reset internal state in memory to forget those revisions

829

# then reset internal state in memory to forget those revisions

830

self.cache = None

830

self.cache = None

831

self.chunkcache = None

831

for p in self.index[rev:]:

832

for p in self.index[rev:]:

832

del self.nodemap[p[6]]

833

del self.nodemap[p[6]]

833

del self.index[rev:]

834

del self.index[rev:]

834

835

# truncating the lazyindex also truncates the lazymap.

836

# truncating the lazyindex also truncates the lazymap.

836

if isinstance(self.index, lazyindex):

837

if isinstance(self.index, lazyindex):

837

self.index.trunc(end)

838

self.index.trunc(end)

838

839

840

def checksize(self):

841

def checksize(self):

841

expected = 0

842

expected = 0

842

if self.count():

843

if self.count():

843

expected = self.end(self.count() - 1)

844

expected = self.end(self.count() - 1)

844

845

try:

846

try:

846

f = self.opener(self.datafile)

847

f = self.opener(self.datafile)

847

f.seek(0, 2)

848

f.seek(0, 2)

848

actual = f.tell()

849

actual = f.tell()

849

dd = actual - expected

850

dd = actual - expected

850

except IOError, inst:

851

except IOError, inst:

851

if inst.errno != errno.ENOENT:

852

if inst.errno != errno.ENOENT:

852

raise

853

raise

853

dd = 0

854

dd = 0

854

855

try:

856

try:

856

f = self.opener(self.indexfile)

857

f = self.opener(self.indexfile)

857

f.seek(0, 2)

858

f.seek(0, 2)

858

actual = f.tell()

859

actual = f.tell()

859

s = struct.calcsize(indexformat)

860

s = struct.calcsize(indexformat)

860

i = actual / s

861

i = actual / s

861

di = actual - (i * s)

862

di = actual - (i * s)

862

except IOError, inst:

863

except IOError, inst:

863

if inst.errno != errno.ENOENT:

864

if inst.errno != errno.ENOENT:

864

raise

865

raise

865

di = 0

866

di = 0

866

867

return (dd, di)

868

return (dd, di)

868

869

870

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %s") % t)
             indexformat = ">4l20s20s20s"
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 By default we parse and load 1000 entries at a time.
                 If no position is specified, we load the whole index, and replace
                 the lazy objects in revlog with the underlying objects for
                 efficiency in cases where we look at most of the nodes.
                 """
                 def __init__(self, data, revlog):
                     self.data = data
                     self.s = struct.calcsize(indexformat)
                     self.l = len(data)/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.all = 0
                     self.revlog = revlog
                 def trunc(self, pos):
                     self.l = pos/self.s
                 def load(self, pos=None):
                     if self.all: return
                     if pos is not None:
                         block = pos / 1000
                         i = block * 1000
                         end = min(self.l, i + 1000)
                     else:
                         self.all = 1
                         i = 0
                         end = self.l
                         self.revlog.index = self.index
                         self.revlog.nodemap = self.map
                     while i < end:
                         d = self.data[i * self.s: (i + 1) * self.s]
                         e = struct.unpack(indexformat, d)
                         self.index[i] = e
                         self.map[e[6]] = i
                         i += 1
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.load(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
                 def trunc(self, pos):
                     self.p.trunc(pos)
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     if self.p.all: return
                     n = self.p.data.find(key)
                     if n < 0:
                         raise KeyError(key)
                     pos = n / self.p.s
                     self.p.load(pos)
                 def __contains__(self, key):
                     self.p.load()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][6]
                         except:
                             self.p.load(i)
                             yield self.p.index[i][6]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.cache = None
                     self.chunkcache = None
                     try:
                         i = self.opener(self.indexfile).read()
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     if i and i[:4] != "\0\0\0\0":
                         raise RevlogError(_("incompatible revlog signature on %s") %
                                           self.indexfile)
                     if len(i) > 10000:
                         # big index, let's parse it on demand
                         parser = lazyparser(i, self)
                         self.index = lazyindex(parser)
                         self.nodemap = lazymap(parser)
                     else:
                         s = struct.calcsize(indexformat)
                         l = len(i) / s
                         self.index = [None] * l
                         m = [None] * l
                         n = 0
                         for f in xrange(0, l * s, s):
                             # offset, size, base, linkrev, p1, p2, nodeid
                             e = struct.unpack(indexformat, i[f:f + s])
                             m[n] = (e[6], n)
                             self.index[n] = e
                             n += 1
                         self.nodemap = dict(m)
                         self.nodemap[nullid] = -1
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][3]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     return self.index[self.rev(node)][4:6]
                 def start(self, rev): return self.index[rev][0]
                 def length(self, rev): return self.index[rev][1]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def base(self, rev): return self.index[rev][2]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     """
                     if start is None:
                         start = nullid
                     reachable = {start: 1}
                     heads = {start: 1}
                     startrev = self.rev(start)
                     for r in xrange(startrev + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn in reachable:
                                 reachable[n] = 1
                                 heads[n] = 1
                             if pn in heads:
                                 del heads[pn]
                     return heads.keys()
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def chunk(self, rev):
                     start, length = self.start(rev), self.length(rev)
                     end = start + length
                     def loadcache():
                         cache_length = max(4096 * 1024, length) # 4Mo
                         df = self.opener(self.datafile)
                         df.seek(start)
                         self.chunkcache = (start, df.read(cache_length))
                     if not self.chunkcache:
                         loadcache()
                     cache_start = self.chunkcache[0]
                     cache_end = cache_start + len(self.chunkcache[1])
                     if start >= cache_start and end <= cache_end:
                         # it is cached
                         offset = start - cache_start
                     else:
                         loadcache()
                         offset = 0
                     #def checkchunk():
                     #    df = self.opener(self.datafile)
                     #    df.seek(start)
                     #    return df.read(length)
                     #assert s == checkchunk()
                     return decompress(self.chunkcache[1][offset:offset + length])
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     b = self.base(r)
                     if r == b:
                         return self.diff(self.revision(self.node(r - 1)),
                                          self.revision(node))
                     else:
                         return self.chunk(r)
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.chunk(base)
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         bins.append(self.chunk(r))
                     text = mdiff.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     e = (offset, l, base, link, p1, p2, node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(indexformat, *e)
                     transaction.add(self.datafile, e[0])
                     f = self.opener(self.datafile, "a")
                     if data[0]:
                         f.write(data[0])
                     f.write(data[1])
                     transaction.add(self.indexfile, n * len(entry))
                     self.opener(self.indexfile, "a").write(entry)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         earliest = self.count()
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 r = self.rev(n)
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield struct.pack(">l", 0)
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # helper to reconstruct intermediate versions
                     def construct(text, base, rev):
                         bins = [self.chunk(r) for r in xrange(base + 1, rev + 1)]
                         return mdiff.patches(text, bins)
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         na = self.node(a)
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         # do we need to construct a new delta?
                         if a + 1 != b or self.base(b) == b:
                             ta = self.revision(na)
                             tb = self.revision(nb)
                             d = self.diff(ta, tb)
                         else:
                             d = self.chunk(b)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         l = struct.pack(">l", len(meta) + len(d) + 4)
                         yield l
                         yield meta
                         yield d
                     yield struct.pack(">l", 0)
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = nullid
                     base = prev = -1
                     start = end = measure = 0
                     if r:
                         start = self.start(self.base(t))
                         end = self.end(t)
                         measure = self.length(self.base(t))
                         base = self.base(t)
                         prev = self.tip()
                     transaction.add(self.datafile, end)
                     transaction.add(self.indexfile, r * struct.calcsize(indexformat))
                     dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                         if chain != prev or (end - start + len(cdelta)) > measure * 2:
                             # flush our writes here so we can read it in revision
                             dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             measure = len(text)
                         else:
                             e = (end, len(cdelta), self.base(t), link, p1, p2, node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             dfh.write(cdelta)
                             ifh.write(struct.pack(indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         start = self.start(self.base(t))
                         end = self.end(t)
                     dfh.close()
                     ifh.close()
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][3]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     self.opener(self.datafile, "a").truncate(end)
                     end = rev * struct.calcsize(indexformat)
                     self.opener(self.indexfile, "a").truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self.cache = None
+                    self.chunkcache = None
                     for p in self.index[rev:]:
                         del self.nodemap[p[6]]
                     del self.index[rev:]
                     # truncating the lazyindex also truncates the lazymap.
                     if isinstance(self.index, lazyindex):
                         self.index.trunc(end)
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = struct.calcsize(indexformat)
                         i = actual / s
                         di = actual - (i * s)
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)