upstream/mercurial-mirror Commit - r1535:7ae0ce7a

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

17

18

def hash(text, p1, p2):

18

def hash(text, p1, p2):

19

"""generate a hash from the given text and its parent hashes

19

"""generate a hash from the given text and its parent hashes

20

21

This hash combines both the current file contents and its history

21

This hash combines both the current file contents and its history

22

in a manner that makes it easy to distinguish nodes with the same

22

in a manner that makes it easy to distinguish nodes with the same

23

content in the revision graph.

23

content in the revision graph.

24

"""

24

"""

25

l = [p1, p2]

25

l = [p1, p2]

26

l.sort()

26

l.sort()

27

s = sha.new(l[0])

27

s = sha.new(l[0])

28

s.update(l[1])

28

s.update(l[1])

29

s.update(text)

29

s.update(text)

30

return s.digest()

30

return s.digest()

31

32

def compress(text):

32

def compress(text):

33

""" generate a possibly-compressed representation of text """

33

""" generate a possibly-compressed representation of text """

34

if not text: return ("", text)

34

if not text: return ("", text)

35

if len(text) < 44:

35

if len(text) < 44:

36

if text[0] == '\0': return ("", text)

36

if text[0] == '\0': return ("", text)

37

return ('u', text)

37

return ('u', text)

38

bin = zlib.compress(text)

38

bin = zlib.compress(text)

39

if len(bin) > len(text):

39

if len(bin) > len(text):

40

if text[0] == '\0': return ("", text)

40

if text[0] == '\0': return ("", text)

41

return ('u', text)

41

return ('u', text)

42

return ("", bin)

42

return ("", bin)

43

44

def decompress(bin):

44

def decompress(bin):

45

""" decompress the given input """

45

""" decompress the given input """

46

if not bin: return bin

46

if not bin: return bin

47

t = bin[0]

47

t = bin[0]

48

if t == '\0': return bin

48

if t == '\0': return bin

49

if t == 'x': return zlib.decompress(bin)

49

if t == 'x': return zlib.decompress(bin)

50

if t == 'u': return bin[1:]

50

if t == 'u': return bin[1:]

51

raise RevlogError(_("unknown compression type %s") % t)

51

raise RevlogError(_("unknown compression type %s") % t)

52

53

indexformat = ">4l20s20s20s"

53

indexformat = ">4l20s20s20s"

54

55

class lazyparser:

55

class lazyparser:

56

"""

56

"""

57

this class avoids the need to parse the entirety of large indices

57

this class avoids the need to parse the entirety of large indices

58

59

By default we parse and load 1000 entries at a time.

59

By default we parse and load 1000 entries at a time.

60

61

If no position is specified, we load the whole index, and replace

61

If no position is specified, we load the whole index, and replace

62

the lazy objects in revlog with the underlying objects for

62

the lazy objects in revlog with the underlying objects for

63

efficiency in cases where we look at most of the nodes.

63

efficiency in cases where we look at most of the nodes.

64

"""

64

"""

65

def __init__(self, data, revlog):

65

def __init__(self, data, revlog):

66

self.data = data

66

self.data = data

67

self.s = struct.calcsize(indexformat)

67

self.s = struct.calcsize(indexformat)

68

self.l = len(data)/self.s

68

self.l = len(data)/self.s

69

self.index = [None] * self.l

69

self.index = [None] * self.l

70

self.map = {nullid: -1}

70

self.map = {nullid: -1}

71

self.all = 0

71

self.all = 0

72

self.revlog = revlog

72

self.revlog = revlog

73

74

def trunc(self, pos):

75

self.l = pos/self.s

76

74

def load(self, pos=None):

77

def load(self, pos=None):

75

if self.all: return

78

if self.all: return

76

if pos is not None:

79

if pos is not None:

77

block = pos / 1000

80

block = pos / 1000

78

i = block * 1000

81

i = block * 1000

79

end = min(self.l, i + 1000)

82

end = min(self.l, i + 1000)

80

else:

83

else:

81

self.all = 1

84

self.all = 1

82

i = 0

85

i = 0

83

end = self.l

86

end = self.l

84

self.revlog.index = self.index

87

self.revlog.index = self.index

85

self.revlog.nodemap = self.map

88

self.revlog.nodemap = self.map

86

89

87

while i < end:

90

while i < end:

88

d = self.data[i * self.s: (i + 1) * self.s]

91

d = self.data[i * self.s: (i + 1) * self.s]

89

e = struct.unpack(indexformat, d)

92

e = struct.unpack(indexformat, d)

90

self.index[i] = e

93

self.index[i] = e

91

self.map[e[6]] = i

94

self.map[e[6]] = i

92

i += 1

95

i += 1

93

96

94

class lazyindex:

97

class lazyindex:

95

"""a lazy version of the index array"""

98

"""a lazy version of the index array"""

96

def __init__(self, parser):

99

def __init__(self, parser):

97

self.p = parser

100

self.p = parser

98

def __len__(self):

101

def __len__(self):

99

return len(self.p.index)

102

return len(self.p.index)

100

def load(self, pos):

103

def load(self, pos):

101

if pos < 0:

104

if pos < 0:

102

pos += len(self.p.index)

105

pos += len(self.p.index)

103

self.p.load(pos)

106

self.p.load(pos)

104

return self.p.index[pos]

107

return self.p.index[pos]

105

def __getitem__(self, pos):

108

def __getitem__(self, pos):

106

return self.p.index[pos] or self.load(pos)

109

return self.p.index[pos] or self.load(pos)

110

def __delitem__(self, pos):

111

del self.p.index[pos]

107

def append(self, e):

112

def append(self, e):

108

self.p.index.append(e)

113

self.p.index.append(e)

114

def trunc(self, pos):

115

self.p.trunc(pos)

109

116

110

class lazymap:

117

class lazymap:

111

"""a lazy version of the node map"""

118

"""a lazy version of the node map"""

112

def __init__(self, parser):

119

def __init__(self, parser):

113

self.p = parser

120

self.p = parser

114

def load(self, key):

121

def load(self, key):

115

if self.p.all: return

122

if self.p.all: return

116

n = self.p.data.find(key)

123

n = self.p.data.find(key)

117

if n < 0:

124

if n < 0:

118

raise KeyError(key)

125

raise KeyError(key)

119

pos = n / self.p.s

126

pos = n / self.p.s

120

self.p.load(pos)

127

self.p.load(pos)

121

def __contains__(self, key):

128

def __contains__(self, key):

122

self.p.load()

129

self.p.load()

123

return key in self.p.map

130

return key in self.p.map

124

def __iter__(self):

131

def __iter__(self):

125

yield nullid

132

yield nullid

126

for i in xrange(self.p.l):

133

for i in xrange(self.p.l):

127

try:

134

try:

128

yield self.p.index[i][6]

135

yield self.p.index[i][6]

129

except:

136

except:

130

self.p.load(i)

137

self.p.load(i)

131

yield self.p.index[i][6]

138

yield self.p.index[i][6]

132

def __getitem__(self, key):

139

def __getitem__(self, key):

133

try:

140

try:

134

return self.p.map[key]

141

return self.p.map[key]

135

except KeyError:

142

except KeyError:

136

try:

143

try:

137

self.load(key)

144

self.load(key)

138

return self.p.map[key]

145

return self.p.map[key]

139

except KeyError:

146

except KeyError:

140

raise KeyError("node " + hex(key))

147

raise KeyError("node " + hex(key))

141

def __setitem__(self, key, val):

148

def __setitem__(self, key, val):

142

self.p.map[key] = val

149

self.p.map[key] = val

150

def __delitem__(self, key):

151

del self.p.map[key]

143

152

144

class RevlogError(Exception): pass

153

class RevlogError(Exception): pass

145

154

146

class revlog:

155

class revlog:

147

"""

156

"""

148

the underlying revision storage object

157

the underlying revision storage object

149

158

150

A revlog consists of two parts, an index and the revision data.

159

A revlog consists of two parts, an index and the revision data.

151

160

152

The index is a file with a fixed record size containing

161

The index is a file with a fixed record size containing

153

information on each revision, includings its nodeid (hash), the

162

information on each revision, includings its nodeid (hash), the

154

nodeids of its parents, the position and offset of its data within

163

nodeids of its parents, the position and offset of its data within

155

the data file, and the revision it's based on. Finally, each entry

164

the data file, and the revision it's based on. Finally, each entry

156

contains a linkrev entry that can serve as a pointer to external

165

contains a linkrev entry that can serve as a pointer to external

157

data.

166

data.

158

167

159

The revision data itself is a linear collection of data chunks.

168

The revision data itself is a linear collection of data chunks.

160

Each chunk represents a revision and is usually represented as a

169

Each chunk represents a revision and is usually represented as a

161

delta against the previous chunk. To bound lookup time, runs of

170

delta against the previous chunk. To bound lookup time, runs of

162

deltas are limited to about 2 times the length of the original

171

deltas are limited to about 2 times the length of the original

163

version data. This makes retrieval of a version proportional to

172

version data. This makes retrieval of a version proportional to

164

its size, or O(1) relative to the number of revisions.

173

its size, or O(1) relative to the number of revisions.

165

174

166

Both pieces of the revlog are written to in an append-only

175

Both pieces of the revlog are written to in an append-only

167

fashion, which means we never need to rewrite a file to insert or

176

fashion, which means we never need to rewrite a file to insert or

168

remove data, and can use some simple techniques to avoid the need

177

remove data, and can use some simple techniques to avoid the need

169

for locking while reading.

178

for locking while reading.

170

"""

179

"""

171

def __init__(self, opener, indexfile, datafile):

180

def __init__(self, opener, indexfile, datafile):

172

"""

181

"""

173

create a revlog object

182

create a revlog object

174

183

175

opener is a function that abstracts the file opening operation

184

opener is a function that abstracts the file opening operation

176

and can be used to implement COW semantics or the like.

185

and can be used to implement COW semantics or the like.

177

"""

186

"""

178

self.indexfile = indexfile

187

self.indexfile = indexfile

179

self.datafile = datafile

188

self.datafile = datafile

180

self.opener = opener

189

self.opener = opener

181

self.cache = None

190

self.cache = None

182

191

183

try:

192

try:

184

i = self.opener(self.indexfile).read()

193

i = self.opener(self.indexfile).read()

185

except IOError, inst:

194

except IOError, inst:

186

if inst.errno != errno.ENOENT:

195

if inst.errno != errno.ENOENT:

187

raise

196

raise

188

i = ""

197

i = ""

189

198

190

if len(i) > 10000:

199

if len(i) > 10000:

191

# big index, let's parse it on demand

200

# big index, let's parse it on demand

192

parser = lazyparser(i, self)

201

parser = lazyparser(i, self)

193

self.index = lazyindex(parser)

202

self.index = lazyindex(parser)

194

self.nodemap = lazymap(parser)

203

self.nodemap = lazymap(parser)

195

else:

204

else:

196

s = struct.calcsize(indexformat)

205

s = struct.calcsize(indexformat)

197

l = len(i) / s

206

l = len(i) / s

198

self.index = [None] * l

207

self.index = [None] * l

199

m = [None] * l

208

m = [None] * l

200

209

201

n = 0

210

n = 0

202

for f in xrange(0, len(i), s):

211

for f in xrange(0, len(i), s):

203

# offset, size, base, linkrev, p1, p2, nodeid

212

# offset, size, base, linkrev, p1, p2, nodeid

204

e = struct.unpack(indexformat, i[f:f + s])

213

e = struct.unpack(indexformat, i[f:f + s])

205

m[n] = (e[6], n)

214

m[n] = (e[6], n)

206

self.index[n] = e

215

self.index[n] = e

207

n += 1

216

n += 1

208

217

209

self.nodemap = dict(m)

218

self.nodemap = dict(m)

210

self.nodemap[nullid] = -1

219

self.nodemap[nullid] = -1

211

220

212

def tip(self): return self.node(len(self.index) - 1)

221

def tip(self): return self.node(len(self.index) - 1)

213

def count(self): return len(self.index)

222

def count(self): return len(self.index)

214

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

223

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

215

def rev(self, node):

224

def rev(self, node):

216

try:

225

try:

217

return self.nodemap[node]

226

return self.nodemap[node]

218

except KeyError:

227

except KeyError:

219

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

228

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

220

def linkrev(self, node): return self.index[self.rev(node)][3]

229

def linkrev(self, node): return self.index[self.rev(node)][3]

221

def parents(self, node):

230

def parents(self, node):

222

if node == nullid: return (nullid, nullid)

231

if node == nullid: return (nullid, nullid)

223

return self.index[self.rev(node)][4:6]

232

return self.index[self.rev(node)][4:6]

224

233

225

def start(self, rev): return self.index[rev][0]

234

def start(self, rev): return self.index[rev][0]

226

def length(self, rev): return self.index[rev][1]

235

def length(self, rev): return self.index[rev][1]

227

def end(self, rev): return self.start(rev) + self.length(rev)

236

def end(self, rev): return self.start(rev) + self.length(rev)

228

def base(self, rev): return self.index[rev][2]

237

def base(self, rev): return self.index[rev][2]

229

238

230

def reachable(self, rev, stop=None):

239

def reachable(self, rev, stop=None):

231

reachable = {}

240

reachable = {}

232

visit = [rev]

241

visit = [rev]

233

reachable[rev] = 1

242

reachable[rev] = 1

234

if stop:

243

if stop:

235

stopn = self.rev(stop)

244

stopn = self.rev(stop)

236

else:

245

else:

237

stopn = 0

246

stopn = 0

238

while visit:

247

while visit:

239

n = visit.pop(0)

248

n = visit.pop(0)

240

if n == stop:

249

if n == stop:

241

continue

250

continue

242

if n == nullid:

251

if n == nullid:

243

continue

252

continue

244

for p in self.parents(n):

253

for p in self.parents(n):

245

if self.rev(p) < stopn:

254

if self.rev(p) < stopn:

246

continue

255

continue

247

if p not in reachable:

256

if p not in reachable:

248

reachable[p] = 1

257

reachable[p] = 1

249

visit.append(p)

258

visit.append(p)

250

return reachable

259

return reachable

251

260

252

def nodesbetween(self, roots=None, heads=None):

261

def nodesbetween(self, roots=None, heads=None):

253

"""Return a tuple containing three elements. Elements 1 and 2 contain

262

"""Return a tuple containing three elements. Elements 1 and 2 contain

254

a final list bases and heads after all the unreachable ones have been

263

a final list bases and heads after all the unreachable ones have been

255

pruned. Element 0 contains a topologically sorted list of all

264

pruned. Element 0 contains a topologically sorted list of all

256

265

257

nodes that satisfy these constraints:

266

nodes that satisfy these constraints:

258

1. All nodes must be descended from a node in roots (the nodes on

267

1. All nodes must be descended from a node in roots (the nodes on

259

roots are considered descended from themselves).

268

roots are considered descended from themselves).

260

2. All nodes must also be ancestors of a node in heads (the nodes in

269

2. All nodes must also be ancestors of a node in heads (the nodes in

261

heads are considered to be their own ancestors).

270

heads are considered to be their own ancestors).

262

271

263

If roots is unspecified, nullid is assumed as the only root.

272

If roots is unspecified, nullid is assumed as the only root.

264

If heads is unspecified, it is taken to be the output of the

273

If heads is unspecified, it is taken to be the output of the

265

heads method (i.e. a list of all nodes in the repository that

274

heads method (i.e. a list of all nodes in the repository that

266

have no children)."""

275

have no children)."""

267

nonodes = ([], [], [])

276

nonodes = ([], [], [])

268

if roots is not None:

277

if roots is not None:

269

roots = list(roots)

278

roots = list(roots)

270

if not roots:

279

if not roots:

271

return nonodes

280

return nonodes

272

lowestrev = min([self.rev(n) for n in roots])

281

lowestrev = min([self.rev(n) for n in roots])

273

else:

282

else:

274

roots = [nullid] # Everybody's a descendent of nullid

283

roots = [nullid] # Everybody's a descendent of nullid

275

lowestrev = -1

284

lowestrev = -1

276

if (lowestrev == -1) and (heads is None):

285

if (lowestrev == -1) and (heads is None):

277

# We want _all_ the nodes!

286

# We want _all_ the nodes!

278

return ([self.node(r) for r in xrange(0, self.count())],

287

return ([self.node(r) for r in xrange(0, self.count())],

279

[nullid], list(self.heads()))

288

[nullid], list(self.heads()))

280

if heads is None:

289

if heads is None:

281

# All nodes are ancestors, so the latest ancestor is the last

290

# All nodes are ancestors, so the latest ancestor is the last

282

# node.

291

# node.

283

highestrev = self.count() - 1

292

highestrev = self.count() - 1

284

# Set ancestors to None to signal that every node is an ancestor.

293

# Set ancestors to None to signal that every node is an ancestor.

285

ancestors = None

294

ancestors = None

286

# Set heads to an empty dictionary for later discovery of heads

295

# Set heads to an empty dictionary for later discovery of heads

287

heads = {}

296

heads = {}

288

else:

297

else:

289

heads = list(heads)

298

heads = list(heads)

290

if not heads:

299

if not heads:

291

return nonodes

300

return nonodes

292

ancestors = {}

301

ancestors = {}

293

# Start at the top and keep marking parents until we're done.

302

# Start at the top and keep marking parents until we're done.

294

nodestotag = heads[:]

303

nodestotag = heads[:]

295

# Turn heads into a dictionary so we can remove 'fake' heads.

304

# Turn heads into a dictionary so we can remove 'fake' heads.

296

# Also, later we will be using it to filter out the heads we can't

305

# Also, later we will be using it to filter out the heads we can't

297

# find from roots.

306

# find from roots.

298

heads = dict.fromkeys(heads, 0)

307

heads = dict.fromkeys(heads, 0)

299

# Remember where the top was so we can use it as a limit later.

308

# Remember where the top was so we can use it as a limit later.

300

highestrev = max([self.rev(n) for n in nodestotag])

309

highestrev = max([self.rev(n) for n in nodestotag])

301

while nodestotag:

310

while nodestotag:

302

# grab a node to tag

311

# grab a node to tag

303

n = nodestotag.pop()

312

n = nodestotag.pop()

304

# Never tag nullid

313

# Never tag nullid

305

if n == nullid:

314

if n == nullid:

306

continue

315

continue

307

# A node's revision number represents its place in a

316

# A node's revision number represents its place in a

308

# topologically sorted list of nodes.

317

# topologically sorted list of nodes.

309

r = self.rev(n)

318

r = self.rev(n)

310

if r >= lowestrev:

319

if r >= lowestrev:

311

if n not in ancestors:

320

if n not in ancestors:

312

# If we are possibly a descendent of one of the roots

321

# If we are possibly a descendent of one of the roots

313

# and we haven't already been marked as an ancestor

322

# and we haven't already been marked as an ancestor

314

ancestors[n] = 1 # Mark as ancestor

323

ancestors[n] = 1 # Mark as ancestor

315

# Add non-nullid parents to list of nodes to tag.

324

# Add non-nullid parents to list of nodes to tag.

316

nodestotag.extend([p for p in self.parents(n) if

325

nodestotag.extend([p for p in self.parents(n) if

317

p != nullid])

326

p != nullid])

318

elif n in heads: # We've seen it before, is it a fake head?

327

elif n in heads: # We've seen it before, is it a fake head?

319

# So it is, real heads should not be the ancestors of

328

# So it is, real heads should not be the ancestors of

320

# any other heads.

329

# any other heads.

321

heads.pop(n)

330

heads.pop(n)

322

if not ancestors:

331

if not ancestors:

323

return nonodes

332

return nonodes

324

# Now that we have our set of ancestors, we want to remove any

333

# Now that we have our set of ancestors, we want to remove any

325

# roots that are not ancestors.

334

# roots that are not ancestors.

326

335

327

# If one of the roots was nullid, everything is included anyway.

336

# If one of the roots was nullid, everything is included anyway.

328

if lowestrev > -1:

337

if lowestrev > -1:

329

# But, since we weren't, let's recompute the lowest rev to not

338

# But, since we weren't, let's recompute the lowest rev to not

330

# include roots that aren't ancestors.

339

# include roots that aren't ancestors.

331

340

332

# Filter out roots that aren't ancestors of heads

341

# Filter out roots that aren't ancestors of heads

333

roots = [n for n in roots if n in ancestors]

342

roots = [n for n in roots if n in ancestors]

334

# Recompute the lowest revision

343

# Recompute the lowest revision

335

if roots:

344

if roots:

336

lowestrev = min([self.rev(n) for n in roots])

345

lowestrev = min([self.rev(n) for n in roots])

337

else:

346

else:

338

# No more roots? Return empty list

347

# No more roots? Return empty list

339

return nonodes

348

return nonodes

340

else:

349

else:

341

# We are descending from nullid, and don't need to care about

350

# We are descending from nullid, and don't need to care about

342

# any other roots.

351

# any other roots.

343

lowestrev = -1

352

lowestrev = -1

344

roots = [nullid]

353

roots = [nullid]

345

# Transform our roots list into a 'set' (i.e. a dictionary where the

354

# Transform our roots list into a 'set' (i.e. a dictionary where the

346

# values don't matter.

355

# values don't matter.

347

descendents = dict.fromkeys(roots, 1)

356

descendents = dict.fromkeys(roots, 1)

348

# Also, keep the original roots so we can filter out roots that aren't

357

# Also, keep the original roots so we can filter out roots that aren't

349

# 'real' roots (i.e. are descended from other roots).

358

# 'real' roots (i.e. are descended from other roots).

350

roots = descendents.copy()

359

roots = descendents.copy()

351

# Our topologically sorted list of output nodes.

360

# Our topologically sorted list of output nodes.

352

orderedout = []

361

orderedout = []

353

# Don't start at nullid since we don't want nullid in our output list,

362

# Don't start at nullid since we don't want nullid in our output list,

354

# and if nullid shows up in descedents, empty parents will look like

363

# and if nullid shows up in descedents, empty parents will look like

355

# they're descendents.

364

# they're descendents.

356

for r in xrange(max(lowestrev, 0), highestrev + 1):

365

for r in xrange(max(lowestrev, 0), highestrev + 1):

357

n = self.node(r)

366

n = self.node(r)

358

isdescendent = False

367

isdescendent = False

359

if lowestrev == -1: # Everybody is a descendent of nullid

368

if lowestrev == -1: # Everybody is a descendent of nullid

360

isdescendent = True

369

isdescendent = True

361

elif n in descendents:

370

elif n in descendents:

362

# n is already a descendent

371

# n is already a descendent

363

isdescendent = True

372

isdescendent = True

364

# This check only needs to be done here because all the roots

373

# This check only needs to be done here because all the roots

365

# will start being marked is descendents before the loop.

374

# will start being marked is descendents before the loop.

366

if n in roots:

375

if n in roots:

367

# If n was a root, check if it's a 'real' root.

376

# If n was a root, check if it's a 'real' root.

368

p = tuple(self.parents(n))

377

p = tuple(self.parents(n))

369

# If any of its parents are descendents, it's not a root.

378

# If any of its parents are descendents, it's not a root.

370

if (p[0] in descendents) or (p[1] in descendents):

379

if (p[0] in descendents) or (p[1] in descendents):

371

roots.pop(n)

380

roots.pop(n)

372

else:

381

else:

373

p = tuple(self.parents(n))

382

p = tuple(self.parents(n))

374

# A node is a descendent if either of its parents are

383

# A node is a descendent if either of its parents are

375

# descendents. (We seeded the dependents list with the roots

384

# descendents. (We seeded the dependents list with the roots

376

# up there, remember?)

385

# up there, remember?)

377

if (p[0] in descendents) or (p[1] in descendents):

386

if (p[0] in descendents) or (p[1] in descendents):

378

descendents[n] = 1

387

descendents[n] = 1

379

isdescendent = True

388

isdescendent = True

380

if isdescendent and ((ancestors is None) or (n in ancestors)):

389

if isdescendent and ((ancestors is None) or (n in ancestors)):

381

# Only include nodes that are both descendents and ancestors.

390

# Only include nodes that are both descendents and ancestors.

382

orderedout.append(n)

391

orderedout.append(n)

383

if (ancestors is not None) and (n in heads):

392

if (ancestors is not None) and (n in heads):

384

# We're trying to figure out which heads are reachable

393

# We're trying to figure out which heads are reachable

385

# from roots.

394

# from roots.

386

# Mark this head as having been reached

395

# Mark this head as having been reached

387

heads[n] = 1

396

heads[n] = 1

388

elif ancestors is None:

397

elif ancestors is None:

389

# Otherwise, we're trying to discover the heads.

398

# Otherwise, we're trying to discover the heads.

390

# Assume this is a head because if it isn't, the next step

399

# Assume this is a head because if it isn't, the next step

391

# will eventually remove it.

400

# will eventually remove it.

392

heads[n] = 1

401

heads[n] = 1

393

# But, obviously its parents aren't.

402

# But, obviously its parents aren't.

394

for p in self.parents(n):

403

for p in self.parents(n):

395

heads.pop(p, None)

404

heads.pop(p, None)

396

heads = [n for n in heads.iterkeys() if heads[n] != 0]

405

heads = [n for n in heads.iterkeys() if heads[n] != 0]

397

roots = roots.keys()

406

roots = roots.keys()

398

assert orderedout

407

assert orderedout

399

assert roots

408

assert roots

400

assert heads

409

assert heads

401

return (orderedout, roots, heads)

410

return (orderedout, roots, heads)

402

411

403

def heads(self, stop=None):

412

def heads(self, stop=None):

404

"""return the list of all nodes that have no children"""

413

"""return the list of all nodes that have no children"""

405

p = {}

414

p = {}

406

h = []

415

h = []

407

stoprev = 0

416

stoprev = 0

408

if stop and stop in self.nodemap:

417

if stop and stop in self.nodemap:

409

stoprev = self.rev(stop)

418

stoprev = self.rev(stop)

410

419

411

for r in range(self.count() - 1, -1, -1):

420

for r in range(self.count() - 1, -1, -1):

412

n = self.node(r)

421

n = self.node(r)

413

if n not in p:

422

if n not in p:

414

h.append(n)

423

h.append(n)

415

if n == stop:

424

if n == stop:

416

break

425

break

417

if r < stoprev:

426

if r < stoprev:

418

break

427

break

419

for pn in self.parents(n):

428

for pn in self.parents(n):

420

p[pn] = 1

429

p[pn] = 1

421

return h

430

return h

422

431

423

def children(self, node):

432

def children(self, node):

424

"""find the children of a given node"""

433

"""find the children of a given node"""

425

c = []

434

c = []

426

p = self.rev(node)

435

p = self.rev(node)

427

for r in range(p + 1, self.count()):

436

for r in range(p + 1, self.count()):

428

n = self.node(r)

437

n = self.node(r)

429

for pn in self.parents(n):

438

for pn in self.parents(n):

430

if pn == node:

439

if pn == node:

431

c.append(n)

440

c.append(n)

432

continue

441

continue

433

elif pn == nullid:

442

elif pn == nullid:

434

continue

443

continue

435

return c

444

return c

436

445

437

def lookup(self, id):

446

def lookup(self, id):

438

"""locate a node based on revision number or subset of hex nodeid"""

447

"""locate a node based on revision number or subset of hex nodeid"""

439

try:

448

try:

440

rev = int(id)

449

rev = int(id)

441

if str(rev) != id: raise ValueError

450

if str(rev) != id: raise ValueError

442

if rev < 0: rev = self.count() + rev

451

if rev < 0: rev = self.count() + rev

443

if rev < 0 or rev >= self.count(): raise ValueError

452

if rev < 0 or rev >= self.count(): raise ValueError

444

return self.node(rev)

453

return self.node(rev)

445

except (ValueError, OverflowError):

454

except (ValueError, OverflowError):

446

c = []

455

c = []

447

for n in self.nodemap:

456

for n in self.nodemap:

448

if hex(n).startswith(id):

457

if hex(n).startswith(id):

449

c.append(n)

458

c.append(n)

450

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

459

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

451

if len(c) < 1: raise RevlogError(_("No match found"))

460

if len(c) < 1: raise RevlogError(_("No match found"))

452

return c[0]

461

return c[0]

453

462

454

return None

463

return None

455

464

456

def diff(self, a, b):

465

def diff(self, a, b):

457

"""return a delta between two revisions"""

466

"""return a delta between two revisions"""

458

return mdiff.textdiff(a, b)

467

return mdiff.textdiff(a, b)

459

468

460

def patches(self, t, pl):

469

def patches(self, t, pl):

461

"""apply a list of patches to a string"""

470

"""apply a list of patches to a string"""

462

return mdiff.patches(t, pl)

471

return mdiff.patches(t, pl)

463

472

464

def delta(self, node):

473

def delta(self, node):

465

"""return or calculate a delta between a node and its predecessor"""

474

"""return or calculate a delta between a node and its predecessor"""

466

r = self.rev(node)

475

r = self.rev(node)

467

b = self.base(r)

476

b = self.base(r)

468

if r == b:

477

if r == b:

469

return self.diff(self.revision(self.node(r - 1)),

478

return self.diff(self.revision(self.node(r - 1)),

470

self.revision(node))

479

self.revision(node))

471

else:

480

else:

472

f = self.opener(self.datafile)

481

f = self.opener(self.datafile)

473

f.seek(self.start(r))

482

f.seek(self.start(r))

474

data = f.read(self.length(r))

483

data = f.read(self.length(r))

475

return decompress(data)

484

return decompress(data)

476

485

477

def revision(self, node):

486

def revision(self, node):

478

"""return an uncompressed revision of a given"""

487

"""return an uncompressed revision of a given"""

479

if node == nullid: return ""

488

if node == nullid: return ""

480

if self.cache and self.cache[0] == node: return self.cache[2]

489

if self.cache and self.cache[0] == node: return self.cache[2]

481

490

482

# look up what we need to read

491

# look up what we need to read

483

text = None

492

text = None

484

rev = self.rev(node)

493

rev = self.rev(node)

485

start, length, base, link, p1, p2, node = self.index[rev]

494

start, length, base, link, p1, p2, node = self.index[rev]

486

end = start + length

495

end = start + length

487

if base != rev: start = self.start(base)

496

if base != rev: start = self.start(base)

488

497

489

# do we have useful data cached?

498

# do we have useful data cached?

490

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

499

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

491

base = self.cache[1]

500

base = self.cache[1]

492

start = self.start(base + 1)

501

start = self.start(base + 1)

493

text = self.cache[2]

502

text = self.cache[2]

494

last = 0

503

last = 0

495

504

496

f = self.opener(self.datafile)

505

f = self.opener(self.datafile)

497

f.seek(start)

506

f.seek(start)

498

data = f.read(end - start)

507

data = f.read(end - start)

499

508

500

if text is None:

509

if text is None:

501

last = self.length(base)

510

last = self.length(base)

502

text = decompress(data[:last])

511

text = decompress(data[:last])

503

512

504

bins = []

513

bins = []

505

for r in xrange(base + 1, rev + 1):

514

for r in xrange(base + 1, rev + 1):

506

s = self.length(r)

515

s = self.length(r)

507

bins.append(decompress(data[last:last + s]))

516

bins.append(decompress(data[last:last + s]))

508

last = last + s

517

last = last + s

509

518

510

text = mdiff.patches(text, bins)

519

text = mdiff.patches(text, bins)

511

520

512

if node != hash(text, p1, p2):

521

if node != hash(text, p1, p2):

513

raise RevlogError(_("integrity check failed on %s:%d")

522

raise RevlogError(_("integrity check failed on %s:%d")

514

% (self.datafile, rev))

523

% (self.datafile, rev))

515

524

516

self.cache = (node, rev, text)

525

self.cache = (node, rev, text)

517

return text

526

return text

518

527

519

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

528

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

520

"""add a revision to the log

529

"""add a revision to the log

521

530

522

text - the revision data to add

531

text - the revision data to add

523

transaction - the transaction object used for rollback

532

transaction - the transaction object used for rollback

524

link - the linkrev data to add

533

link - the linkrev data to add

525

p1, p2 - the parent nodeids of the revision

534

p1, p2 - the parent nodeids of the revision

526

d - an optional precomputed delta

535

d - an optional precomputed delta

527

"""

536

"""

528

if text is None: text = ""

537

if text is None: text = ""

529

if p1 is None: p1 = self.tip()

538

if p1 is None: p1 = self.tip()

530

if p2 is None: p2 = nullid

539

if p2 is None: p2 = nullid

531

540

532

node = hash(text, p1, p2)

541

node = hash(text, p1, p2)

533

542

534

if node in self.nodemap:

543

if node in self.nodemap:

535

return node

544

return node

536

545

537

n = self.count()

546

n = self.count()

538

t = n - 1

547

t = n - 1

539

548

540

if n:

549

if n:

541

base = self.base(t)

550

base = self.base(t)

542

start = self.start(base)

551

start = self.start(base)

543

end = self.end(t)

552

end = self.end(t)

544

if not d:

553

if not d:

545

prev = self.revision(self.tip())

554

prev = self.revision(self.tip())

546

d = self.diff(prev, str(text))

555

d = self.diff(prev, str(text))

547

data = compress(d)

556

data = compress(d)

548

l = len(data[1]) + len(data[0])

557

l = len(data[1]) + len(data[0])

549

dist = end - start + l

558

dist = end - start + l

550

559

551

# full versions are inserted when the needed deltas

560

# full versions are inserted when the needed deltas

552

# become comparable to the uncompressed text

561

# become comparable to the uncompressed text

553

if not n or dist > len(text) * 2:

562

if not n or dist > len(text) * 2:

554

data = compress(text)

563

data = compress(text)

555

l = len(data[1]) + len(data[0])

564

l = len(data[1]) + len(data[0])

556

base = n

565

base = n

557

else:

566

else:

558

base = self.base(t)

567

base = self.base(t)

559

568

560

offset = 0

569

offset = 0

561

if t >= 0:

570

if t >= 0:

562

offset = self.end(t)

571

offset = self.end(t)

563

572

564

e = (offset, l, base, link, p1, p2, node)

573

e = (offset, l, base, link, p1, p2, node)

565

574

566

self.index.append(e)

575

self.index.append(e)

567

self.nodemap[node] = n

576

self.nodemap[node] = n

568

entry = struct.pack(indexformat, *e)

577

entry = struct.pack(indexformat, *e)

569

578

570

transaction.add(self.datafile, e[0])

579

transaction.add(self.datafile, e[0])

571

f = self.opener(self.datafile, "a")

580

f = self.opener(self.datafile, "a")

572

if data[0]:

581

if data[0]:

573

f.write(data[0])

582

f.write(data[0])

574

f.write(data[1])

583

f.write(data[1])

575

transaction.add(self.indexfile, n * len(entry))

584

transaction.add(self.indexfile, n * len(entry))

576

self.opener(self.indexfile, "a").write(entry)

585

self.opener(self.indexfile, "a").write(entry)

577

586

578

self.cache = (node, n, text)

587

self.cache = (node, n, text)

579

return node

588

return node

580

589

581

def ancestor(self, a, b):

590

def ancestor(self, a, b):

582

"""calculate the least common ancestor of nodes a and b"""

591

"""calculate the least common ancestor of nodes a and b"""

583

# calculate the distance of every node from root

592

# calculate the distance of every node from root

584

dist = {nullid: 0}

593

dist = {nullid: 0}

585

for i in xrange(self.count()):

594

for i in xrange(self.count()):

586

n = self.node(i)

595

n = self.node(i)

587

p1, p2 = self.parents(n)

596

p1, p2 = self.parents(n)

588

dist[n] = max(dist[p1], dist[p2]) + 1

597

dist[n] = max(dist[p1], dist[p2]) + 1

589

598

590

# traverse ancestors in order of decreasing distance from root

599

# traverse ancestors in order of decreasing distance from root

591

def ancestors(node):

600

def ancestors(node):

592

# we store negative distances because heap returns smallest member

601

# we store negative distances because heap returns smallest member

593

h = [(-dist[node], node)]

602

h = [(-dist[node], node)]

594

seen = {}

603

seen = {}

595

earliest = self.count()

604

earliest = self.count()

596

while h:

605

while h:

597

d, n = heapq.heappop(h)

606

d, n = heapq.heappop(h)

598

if n not in seen:

607

if n not in seen:

599

seen[n] = 1

608

seen[n] = 1

600

r = self.rev(n)

609

r = self.rev(n)

601

yield (-d, n)

610

yield (-d, n)

602

for p in self.parents(n):

611

for p in self.parents(n):

603

heapq.heappush(h, (-dist[p], p))

612

heapq.heappush(h, (-dist[p], p))

604

613

605

def generations(node):

614

def generations(node):

606

sg, s = None, {}

615

sg, s = None, {}

607

for g,n in ancestors(node):

616

for g,n in ancestors(node):

608

if g != sg:

617

if g != sg:

609

if sg:

618

if sg:

610

yield sg, s

619

yield sg, s

611

sg, s = g, {n:1}

620

sg, s = g, {n:1}

612

else:

621

else:

613

s[n] = 1

622

s[n] = 1

614

yield sg, s

623

yield sg, s

615

624

616

x = generations(a)

625

x = generations(a)

617

y = generations(b)

626

y = generations(b)

618

gx = x.next()

627

gx = x.next()

619

gy = y.next()

628

gy = y.next()

620

629

621

# increment each ancestor list until it is closer to root than

630

# increment each ancestor list until it is closer to root than

622

# the other, or they match

631

# the other, or they match

623

while 1:

632

while 1:

624

#print "ancestor gen %s %s" % (gx[0], gy[0])

633

#print "ancestor gen %s %s" % (gx[0], gy[0])

625

if gx[0] == gy[0]:

634

if gx[0] == gy[0]:

626

# find the intersection

635

# find the intersection

627

i = [ n for n in gx[1] if n in gy[1] ]

636

i = [ n for n in gx[1] if n in gy[1] ]

628

if i:

637

if i:

629

return i[0]

638

return i[0]

630

else:

639

else:

631

#print "next"

640

#print "next"

632

gy = y.next()

641

gy = y.next()

633

gx = x.next()

642

gx = x.next()

634

elif gx[0] < gy[0]:

643

elif gx[0] < gy[0]:

635

#print "next y"

644

#print "next y"

636

gy = y.next()

645

gy = y.next()

637

else:

646

else:

638

#print "next x"

647

#print "next x"

639

gx = x.next()

648

gx = x.next()

640

649

641

def group(self, nodelist, lookup, infocollect = None):

650

def group(self, nodelist, lookup, infocollect = None):

642

"""calculate a delta group

651

"""calculate a delta group

643

652

644

Given a list of changeset revs, return a set of deltas and

653

Given a list of changeset revs, return a set of deltas and

645

metadata corresponding to nodes. the first delta is

654

metadata corresponding to nodes. the first delta is

646

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

655

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

647

have this parent as it has all history before these

656

have this parent as it has all history before these

648

changesets. parent is parent[0]

657

changesets. parent is parent[0]

649

"""

658

"""

650

revs = [self.rev(n) for n in nodelist]

659

revs = [self.rev(n) for n in nodelist]

651

needed = dict.fromkeys(revs, 1)

660

needed = dict.fromkeys(revs, 1)

652

661

653

# if we don't have any revisions touched by these changesets, bail

662

# if we don't have any revisions touched by these changesets, bail

654

if not revs:

663

if not revs:

655

yield struct.pack(">l", 0)

664

yield struct.pack(">l", 0)

656

return

665

return

657

666

658

# add the parent of the first rev

667

# add the parent of the first rev

659

p = self.parents(self.node(revs[0]))[0]

668

p = self.parents(self.node(revs[0]))[0]

660

revs.insert(0, self.rev(p))

669

revs.insert(0, self.rev(p))

661

670

662

# for each delta that isn't contiguous in the log, we need to

671

# for each delta that isn't contiguous in the log, we need to

663

# reconstruct the base, reconstruct the result, and then

672

# reconstruct the base, reconstruct the result, and then

664

# calculate the delta. We also need to do this where we've

673

# calculate the delta. We also need to do this where we've

665

# stored a full version and not a delta

674

# stored a full version and not a delta

666

for i in xrange(0, len(revs) - 1):

675

for i in xrange(0, len(revs) - 1):

667

a, b = revs[i], revs[i + 1]

676

a, b = revs[i], revs[i + 1]

668

if a + 1 != b or self.base(b) == b:

677

if a + 1 != b or self.base(b) == b:

669

for j in xrange(self.base(a), a + 1):

678

for j in xrange(self.base(a), a + 1):

670

needed[j] = 1

679

needed[j] = 1

671

for j in xrange(self.base(b), b + 1):

680

for j in xrange(self.base(b), b + 1):

672

needed[j] = 1

681

needed[j] = 1

673

682

674

# calculate spans to retrieve from datafile

683

# calculate spans to retrieve from datafile

675

needed = needed.keys()

684

needed = needed.keys()

676

needed.sort()

685

needed.sort()

677

spans = []

686

spans = []

678

oo = -1

687

oo = -1

679

ol = 0

688

ol = 0

680

for n in needed:

689

for n in needed:

681

if n < 0: continue

690

if n < 0: continue

682

o = self.start(n)

691

o = self.start(n)

683

l = self.length(n)

692

l = self.length(n)

684

if oo + ol == o: # can we merge with the previous?

693

if oo + ol == o: # can we merge with the previous?

685

nl = spans[-1][2]

694

nl = spans[-1][2]

686

nl.append((n, l))

695

nl.append((n, l))

687

ol += l

696

ol += l

688

spans[-1] = (oo, ol, nl)

697

spans[-1] = (oo, ol, nl)

689

else:

698

else:

690

oo = o

699

oo = o

691

ol = l

700

ol = l

692

spans.append((oo, ol, [(n, l)]))

701

spans.append((oo, ol, [(n, l)]))

693

702

694

# read spans in, divide up chunks

703

# read spans in, divide up chunks

695

chunks = {}

704

chunks = {}

696

for span in spans:

705

for span in spans:

697

# we reopen the file for each span to make http happy for now

706

# we reopen the file for each span to make http happy for now

698

f = self.opener(self.datafile)

707

f = self.opener(self.datafile)

699

f.seek(span[0])

708

f.seek(span[0])

700

data = f.read(span[1])

709

data = f.read(span[1])

701

710

702

# divide up the span

711

# divide up the span

703

pos = 0

712

pos = 0

704

for r, l in span[2]:

713

for r, l in span[2]:

705

chunks[r] = decompress(data[pos: pos + l])

714

chunks[r] = decompress(data[pos: pos + l])

706

pos += l

715

pos += l

707

716

708

# helper to reconstruct intermediate versions

717

# helper to reconstruct intermediate versions

709

def construct(text, base, rev):

718

def construct(text, base, rev):

710

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

719

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

711

return mdiff.patches(text, bins)

720

return mdiff.patches(text, bins)

712

721

713

# build deltas

722

# build deltas

714

deltas = []

723

deltas = []

715

for d in xrange(0, len(revs) - 1):

724

for d in xrange(0, len(revs) - 1):

716

a, b = revs[d], revs[d + 1]

725

a, b = revs[d], revs[d + 1]

717

n = self.node(b)

726

n = self.node(b)

718

727

719

if infocollect is not None:

728

if infocollect is not None:

720

infocollect(n)

729

infocollect(n)

721

730

722

# do we need to construct a new delta?

731

# do we need to construct a new delta?

723

if a + 1 != b or self.base(b) == b:

732

if a + 1 != b or self.base(b) == b:

724

if a >= 0:

733

if a >= 0:

725

base = self.base(a)

734

base = self.base(a)

726

ta = chunks[self.base(a)]

735

ta = chunks[self.base(a)]

727

ta = construct(ta, base, a)

736

ta = construct(ta, base, a)

728

else:

737

else:

729

ta = ""

738

ta = ""

730

739

731

base = self.base(b)

740

base = self.base(b)

732

if a > base:

741

if a > base:

733

base = a

742

base = a

734

tb = ta

743

tb = ta

735

else:

744

else:

736

tb = chunks[self.base(b)]

745

tb = chunks[self.base(b)]

737

tb = construct(tb, base, b)

746

tb = construct(tb, base, b)

738

d = self.diff(ta, tb)

747

d = self.diff(ta, tb)

739

else:

748

else:

740

d = chunks[b]

749

d = chunks[b]

741

750

742

p = self.parents(n)

751

p = self.parents(n)

743

meta = n + p[0] + p[1] + lookup(n)

752

meta = n + p[0] + p[1] + lookup(n)

744

l = struct.pack(">l", len(meta) + len(d) + 4)

753

l = struct.pack(">l", len(meta) + len(d) + 4)

745

yield l

754

yield l

746

yield meta

755

yield meta

747

yield d

756

yield d

748

757

749

yield struct.pack(">l", 0)

758

yield struct.pack(">l", 0)

750

759

751

def addgroup(self, revs, linkmapper, transaction, unique=0):

760

def addgroup(self, revs, linkmapper, transaction, unique=0):

752

"""

761

"""

753

add a delta group

762

add a delta group

754

763

755

given a set of deltas, add them to the revision log. the

764

given a set of deltas, add them to the revision log. the

756

first delta is against its parent, which should be in our

765

first delta is against its parent, which should be in our

757

log, the rest are against the previous delta.

766

log, the rest are against the previous delta.

758

"""

767

"""

759

768

760

#track the base of the current delta log

769

#track the base of the current delta log

761

r = self.count()

770

r = self.count()

762

t = r - 1

771

t = r - 1

763

node = nullid

772

node = nullid

764

773

765

base = prev = -1

774

base = prev = -1

766

start = end = measure = 0

775

start = end = measure = 0

767

if r:

776

if r:

768

start = self.start(self.base(t))

777

start = self.start(self.base(t))

769

end = self.end(t)

778

end = self.end(t)

770

measure = self.length(self.base(t))

779

measure = self.length(self.base(t))

771

base = self.base(t)

780

base = self.base(t)

772

prev = self.tip()

781

prev = self.tip()

773

782

774

transaction.add(self.datafile, end)

783

transaction.add(self.datafile, end)

775

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

784

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

776

dfh = self.opener(self.datafile, "a")

785

dfh = self.opener(self.datafile, "a")

777

ifh = self.opener(self.indexfile, "a")

786

ifh = self.opener(self.indexfile, "a")

778

787

779

# loop through our set of deltas

788

# loop through our set of deltas

780

chain = None

789

chain = None

781

for chunk in revs:

790

for chunk in revs:

782

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

791

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

783

link = linkmapper(cs)

792

link = linkmapper(cs)

784

if node in self.nodemap:

793

if node in self.nodemap:

785

# this can happen if two branches make the same change

794

# this can happen if two branches make the same change

786

# if unique:

795

# if unique:

787

# raise RevlogError(_("already have %s") % hex(node[:4]))

796

# raise RevlogError(_("already have %s") % hex(node[:4]))

788

chain = node

797

chain = node

789

continue

798

continue

790

delta = chunk[80:]

799

delta = chunk[80:]

791

800

792

for p in (p1, p2):

801

for p in (p1, p2):

793

if not p in self.nodemap:

802

if not p in self.nodemap:

794

raise RevlogError(_("unknown parent %s") % short(p1))

803

raise RevlogError(_("unknown parent %s") % short(p1))

795

804

796

if not chain:

805

if not chain:

797

# retrieve the parent revision of the delta chain

806

# retrieve the parent revision of the delta chain

798

chain = p1

807

chain = p1

799

if not chain in self.nodemap:

808

if not chain in self.nodemap:

800

raise RevlogError(_("unknown base %s") % short(chain[:4]))

809

raise RevlogError(_("unknown base %s") % short(chain[:4]))

801

810

802

# full versions are inserted when the needed deltas become

811

# full versions are inserted when the needed deltas become

803

# comparable to the uncompressed text or when the previous

812

# comparable to the uncompressed text or when the previous

804

# version is not the one we have a delta against. We use

813

# version is not the one we have a delta against. We use

805

# the size of the previous full rev as a proxy for the

814

# the size of the previous full rev as a proxy for the

806

# current size.

815

# current size.

807

816

808

if chain == prev:

817

if chain == prev:

809

tempd = compress(delta)

818

tempd = compress(delta)

810

cdelta = tempd[0] + tempd[1]

819

cdelta = tempd[0] + tempd[1]

811

820

812

if chain != prev or (end - start + len(cdelta)) > measure * 2:

821

if chain != prev or (end - start + len(cdelta)) > measure * 2:

813

# flush our writes here so we can read it in revision

822

# flush our writes here so we can read it in revision

814

dfh.flush()

823

dfh.flush()

815

ifh.flush()

824

ifh.flush()

816

text = self.revision(chain)

825

text = self.revision(chain)

817

text = self.patches(text, [delta])

826

text = self.patches(text, [delta])

818

chk = self.addrevision(text, transaction, link, p1, p2)

827

chk = self.addrevision(text, transaction, link, p1, p2)

819

if chk != node:

828

if chk != node:

820

raise RevlogError(_("consistency error adding group"))

829

raise RevlogError(_("consistency error adding group"))

821

measure = len(text)

830

measure = len(text)

822

else:

831

else:

823

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

832

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

824

self.index.append(e)

833

self.index.append(e)

825

self.nodemap[node] = r

834

self.nodemap[node] = r

826

dfh.write(cdelta)

835

dfh.write(cdelta)

827

ifh.write(struct.pack(indexformat, *e))

836

ifh.write(struct.pack(indexformat, *e))

828

837

829

t, r, chain, prev = r, r + 1, node, node

838

t, r, chain, prev = r, r + 1, node, node

830

start = self.start(self.base(t))

839

start = self.start(self.base(t))

831

end = self.end(t)

840

end = self.end(t)

832

841

833

dfh.close()

842

dfh.close()

834

ifh.close()

843

ifh.close()

835

return node

844

return node

836

845

846

def strip(self, rev, minlink):

847

if self.count() == 0 or rev >= self.count():

848

return

849

850

# When stripping away a revision, we need to make sure it

851

# does not actually belong to an older changeset.

852

# The minlink parameter defines the oldest revision

853

# we're allowed to strip away.

854

while minlink > self.index[rev][3]:

855

rev += 1

856

if rev >= self.count():

857

return

858

859

# first truncate the files on disk

860

end = self.start(rev)

861

self.opener(self.datafile, "a").truncate(end)

862

end = rev * struct.calcsize(indexformat)

863

self.opener(self.indexfile, "a").truncate(end)

864

865

# then reset internal state in memory to forget those revisions

866

self.cache = None

867

for p in self.index[rev:]:

868

del self.nodemap[p[6]]

869

del self.index[rev:]

870

871

# truncating the lazyindex also truncates the lazymap.

872

if isinstance(self.index, lazyindex):

873

self.index.trunc(end)

874

875

837

def checksize(self):

876

def checksize(self):

838

expected = 0

877

expected = 0

839

if self.count():

878

if self.count():

840

expected = self.end(self.count() - 1)

879

expected = self.end(self.count() - 1)

841

try:

880

try:

842

f = self.opener(self.datafile)

881

f = self.opener(self.datafile)

843

f.seek(0, 2)

882

f.seek(0, 2)

844

actual = f.tell()

883

actual = f.tell()

845

return expected - actual

884

return expected - actual

846

except IOError, inst:

885

except IOError, inst:

847

if inst.errno == errno.ENOENT:

886

if inst.errno == errno.ENOENT:

848

return 0

887

return 0

849

raise

888

raise

850

889

851

890

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return ("", text)
                 if len(text) < 44:
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %s") % t)
             indexformat = ">4l20s20s20s"
             class lazyparser:
                 """
                 this class avoids the need to parse the entirety of large indices
                 By default we parse and load 1000 entries at a time.
                 If no position is specified, we load the whole index, and replace
                 the lazy objects in revlog with the underlying objects for
                 efficiency in cases where we look at most of the nodes.
                 """
                 def __init__(self, data, revlog):
                     self.data = data
                     self.s = struct.calcsize(indexformat)
                     self.l = len(data)/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.all = 0
                     self.revlog = revlog
+                def trunc(self, pos):
+                    self.l = pos/self.s
                 def load(self, pos=None):
                     if self.all: return
                     if pos is not None:
                         block = pos / 1000
                         i = block * 1000
                         end = min(self.l, i + 1000)
                     else:
                         self.all = 1
                         i = 0
                         end = self.l
                         self.revlog.index = self.index
                         self.revlog.nodemap = self.map
                     while i < end:
                         d = self.data[i * self.s: (i + 1) * self.s]
                         e = struct.unpack(indexformat, d)
                         self.index[i] = e
                         self.map[e[6]] = i
                         i += 1
             class lazyindex:
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.load(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
+                def __delitem__(self, pos):
+                    del self.p.index[pos]
                 def append(self, e):
                     self.p.index.append(e)
+                def trunc(self, pos):
+                    self.p.trunc(pos)
             class lazymap:
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     if self.p.all: return
                     n = self.p.data.find(key)
                     if n < 0:
                         raise KeyError(key)
                     pos = n / self.p.s
                     self.p.load(pos)
                 def __contains__(self, key):
                     self.p.load()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][6]
                         except:
                             self.p.load(i)
                             yield self.p.index[i][6]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
+                def __delitem__(self, key):
+                    del self.p.map[key]
             class RevlogError(Exception): pass
             class revlog:
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.cache = None
                     try:
                         i = self.opener(self.indexfile).read()
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     if len(i) > 10000:
                         # big index, let's parse it on demand
                         parser = lazyparser(i, self)
                         self.index = lazyindex(parser)
                         self.nodemap = lazymap(parser)
                     else:
                         s = struct.calcsize(indexformat)
                         l = len(i) / s
                         self.index = [None] * l
                         m = [None] * l
                         n = 0
                         for f in xrange(0, len(i), s):
                             # offset, size, base, linkrev, p1, p2, nodeid
                             e = struct.unpack(indexformat, i[f:f + s])
                             m[n] = (e[6], n)
                             self.index[n] = e
                             n += 1
                         self.nodemap = dict(m)
                         self.nodemap[nullid] = -1
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][3]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     return self.index[self.rev(node)][4:6]
                 def start(self, rev): return self.index[rev][0]
                 def length(self, rev): return self.index[rev][1]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def base(self, rev): return self.index[rev][2]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, stop=None):
                     """return the list of all nodes that have no children"""
                     p = {}
                     h = []
                     stoprev = 0
                     if stop and stop in self.nodemap:
                         stoprev = self.rev(stop)
                     for r in range(self.count() - 1, -1, -1):
                         n = self.node(r)
                         if n not in p:
                             h.append(n)
                         if n == stop:
                             break
                         if r < stoprev:
                             break
                         for pn in self.parents(n):
                             p[pn] = 1
                     return h
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     b = self.base(r)
                     if r == b:
                         return self.diff(self.revision(self.node(r - 1)),
                                          self.revision(node))
                     else:
                         f = self.opener(self.datafile)
                         f.seek(self.start(r))
                         data = f.read(self.length(r))
                     return decompress(data)
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     start, length, base, link, p1, p2, node = self.index[rev]
                     end = start + length
                     if base != rev: start = self.start(base)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         start = self.start(base + 1)
                         text = self.cache[2]
                         last = 0
                     f = self.opener(self.datafile)
                     f.seek(start)
                     data = f.read(end - start)
                     if text is None:
                         last = self.length(base)
                         text = decompress(data[:last])
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         s = self.length(r)
                         bins.append(decompress(data[last:last + s]))
                         last = last + s
                     text = mdiff.patches(text, bins)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, str(text))
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     e = (offset, l, base, link, p1, p2, node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(indexformat, *e)
                     transaction.add(self.datafile, e[0])
                     f = self.opener(self.datafile, "a")
                     if data[0]:
                         f.write(data[0])
                     f.write(data[1])
                     transaction.add(self.indexfile, n * len(entry))
                     self.opener(self.indexfile, "a").write(entry)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         earliest = self.count()
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 r = self.rev(n)
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect = None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     needed = dict.fromkeys(revs, 1)
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield struct.pack(">l", 0)
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # for each delta that isn't contiguous in the log, we need to
                     # reconstruct the base, reconstruct the result, and then
                     # calculate the delta. We also need to do this where we've
                     # stored a full version and not a delta
                     for i in xrange(0, len(revs) - 1):
                         a, b = revs[i], revs[i + 1]
                         if a + 1 != b or self.base(b) == b:
                             for j in xrange(self.base(a), a + 1):
                                 needed[j] = 1
                             for j in xrange(self.base(b), b + 1):
                                 needed[j] = 1
                     # calculate spans to retrieve from datafile
                     needed = needed.keys()
                     needed.sort()
                     spans = []
                     oo = -1
                     ol = 0
                     for n in needed:
                         if n < 0: continue
                         o = self.start(n)
                         l = self.length(n)
                         if oo + ol == o: # can we merge with the previous?
                             nl = spans[-1][2]
                             nl.append((n, l))
                             ol += l
                             spans[-1] = (oo, ol, nl)
                         else:
                             oo = o
                             ol = l
                             spans.append((oo, ol, [(n, l)]))
                     # read spans in, divide up chunks
                     chunks = {}
                     for span in spans:
                         # we reopen the file for each span to make http happy for now
                         f = self.opener(self.datafile)
                         f.seek(span[0])
                         data = f.read(span[1])
                         # divide up the span
                         pos = 0
                         for r, l in span[2]:
                             chunks[r] = decompress(data[pos: pos + l])
                             pos += l
                     # helper to reconstruct intermediate versions
                     def construct(text, base, rev):
                         bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
                         return mdiff.patches(text, bins)
                     # build deltas
                     deltas = []
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         n = self.node(b)
                         if infocollect is not None:
                             infocollect(n)
                         # do we need to construct a new delta?
                         if a + 1 != b or self.base(b) == b:
                             if a >= 0:
                                 base = self.base(a)
                                 ta = chunks[self.base(a)]
                                 ta = construct(ta, base, a)
                             else:
                                 ta = ""
                             base = self.base(b)
                             if a > base:
                                 base = a
                                 tb = ta
                             else:
                                 tb = chunks[self.base(b)]
                             tb = construct(tb, base, b)
                             d = self.diff(ta, tb)
                         else:
                             d = chunks[b]
                         p = self.parents(n)
                         meta = n + p[0] + p[1] + lookup(n)
                         l = struct.pack(">l", len(meta) + len(d) + 4)
                         yield l
                         yield meta
                         yield d
                     yield struct.pack(">l", 0)
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = nullid
                     base = prev = -1
                     start = end = measure = 0
                     if r:
                         start = self.start(self.base(t))
                         end = self.end(t)
                         measure = self.length(self.base(t))
                         base = self.base(t)
                         prev = self.tip()
                     transaction.add(self.datafile, end)
                     transaction.add(self.indexfile, r * struct.calcsize(indexformat))
                     dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             tempd = compress(delta)
                             cdelta = tempd[0] + tempd[1]
                         if chain != prev or (end - start + len(cdelta)) > measure * 2:
                             # flush our writes here so we can read it in revision
                             dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             measure = len(text)
                         else:
                             e = (end, len(cdelta), self.base(t), link, p1, p2, node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             dfh.write(cdelta)
                             ifh.write(struct.pack(indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         start = self.start(self.base(t))
                         end = self.end(t)
                     dfh.close()
                     ifh.close()
                     return node
+                def strip(self, rev, minlink):
+                    if self.count() == 0 or rev >= self.count():
+                        return
+                    # When stripping away a revision, we need to make sure it
+                    # does not actually belong to an older changeset.
+                    # The minlink parameter defines the oldest revision
+                    # we're allowed to strip away.
+                    while minlink > self.index[rev][3]:
+                        rev += 1
+                        if rev >= self.count():
+                            return
+                    # first truncate the files on disk
+                    end = self.start(rev)
+                    self.opener(self.datafile, "a").truncate(end)
+                    end = rev * struct.calcsize(indexformat)
+                    self.opener(self.indexfile, "a").truncate(end)
+                    # then reset internal state in memory to forget those revisions
+                    self.cache = None
+                    for p in self.index[rev:]:
+                        del self.nodemap[p[6]]
+                    del self.index[rev:]
+                    # truncating the lazyindex also truncates the lazymap.
+                    if isinstance(self.index, lazyindex):
+                        self.index.trunc(end)
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         return expected - actual
                     except IOError, inst:
                         if inst.errno == errno.ENOENT:
                             return 0
                         raise