upstream/mercurial-mirror Commit - r1494:249ca10d

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

17

18

def hash(text, p1, p2):

18

def hash(text, p1, p2):

19

"""generate a hash from the given text and its parent hashes

19

"""generate a hash from the given text and its parent hashes

20

21

This hash combines both the current file contents and its history

21

This hash combines both the current file contents and its history

22

in a manner that makes it easy to distinguish nodes with the same

22

in a manner that makes it easy to distinguish nodes with the same

23

content in the revision graph.

23

content in the revision graph.

24

"""

24

"""

25

l = [p1, p2]

25

l = [p1, p2]

26

l.sort()

26

l.sort()

27

s = sha.new(l[0])

27

s = sha.new(l[0])

28

s.update(l[1])

28

s.update(l[1])

29

s.update(text)

29

s.update(text)

30

return s.digest()

30

return s.digest()

31

32

def compress(text):

32

def compress(text):

33

""" generate a possibly-compressed representation of text """

33

""" generate a possibly-compressed representation of text """

34

if not text: return text

34

if not text: return text

35

if len(text) < 44:

35

if len(text) < 44:

36

if text[0] == '\0': return text

36

if text[0] == '\0': return text

37

return 'u' + text

37

return 'u' + text

38

bin = zlib.compress(text)

38

bin = zlib.compress(text)

39

if len(bin) > len(text):

39

if len(bin) > len(text):

40

if text[0] == '\0': return text

40

if text[0] == '\0': return text

41

return 'u' + text

41

return 'u' + text

42

return bin

42

return bin

43

44

def decompress(bin):

44

def decompress(bin):

45

""" decompress the given input """

45

""" decompress the given input """

46

if not bin: return bin

46

if not bin: return bin

47

t = bin[0]

47

t = bin[0]

48

if t == '\0': return bin

48

if t == '\0': return bin

49

if t == 'x': return zlib.decompress(bin)

49

if t == 'x': return zlib.decompress(bin)

50

if t == 'u': return bin[1:]

50

if t == 'u': return bin[1:]

51

raise RevlogError(_("unknown compression type %s") % t)

51

raise RevlogError(_("unknown compression type %s") % t)

52

53

indexformat = ">4l20s20s20s"

53

indexformat = ">4l20s20s20s"

54

55

class lazyparser:

55

class lazyparser:

56

"""

56

"""

57

this class avoids the need to parse the entirety of large indices

57

this class avoids the need to parse the entirety of large indices

58

59

By default we parse and load 1000 entries at a time.

59

By default we parse and load 1000 entries at a time.

60

61

If no position is specified, we load the whole index, and replace

61

If no position is specified, we load the whole index, and replace

62

the lazy objects in revlog with the underlying objects for

62

the lazy objects in revlog with the underlying objects for

63

efficiency in cases where we look at most of the nodes.

63

efficiency in cases where we look at most of the nodes.

64

"""

64

"""

65

def __init__(self, data, revlog):

65

def __init__(self, data, revlog):

66

self.data = data

66

self.data = data

67

self.s = struct.calcsize(indexformat)

67

self.s = struct.calcsize(indexformat)

68

self.l = len(data)/self.s

68

self.l = len(data)/self.s

69

self.index = [None] * self.l

69

self.index = [None] * self.l

70

self.map = {nullid: -1}

70

self.map = {nullid: -1}

71

self.all = 0

71

self.all = 0

72

self.revlog = revlog

72

self.revlog = revlog

73

74

def load(self, pos=None):

74

def load(self, pos=None):

75

if self.all: return

75

if self.all: return

76

if pos is not None:

76

if pos is not None:

77

block = pos / 1000

77

block = pos / 1000

78

i = block * 1000

78

i = block * 1000

79

end = min(self.l, i + 1000)

79

end = min(self.l, i + 1000)

80

else:

80

else:

81

self.all = 1

81

self.all = 1

82

i = 0

82

i = 0

83

end = self.l

83

end = self.l

84

self.revlog.index = self.index

84

self.revlog.index = self.index

85

self.revlog.nodemap = self.map

85

self.revlog.nodemap = self.map

86

87

while i < end:

87

while i < end:

88

d = self.data[i * self.s: (i + 1) * self.s]

88

d = self.data[i * self.s: (i + 1) * self.s]

89

e = struct.unpack(indexformat, d)

89

e = struct.unpack(indexformat, d)

90

self.index[i] = e

90

self.index[i] = e

91

self.map[e[6]] = i

91

self.map[e[6]] = i

92

i += 1

92

i += 1

93

94

class lazyindex:

94

class lazyindex:

95

"""a lazy version of the index array"""

95

"""a lazy version of the index array"""

96

def __init__(self, parser):

96

def __init__(self, parser):

97

self.p = parser

97

self.p = parser

98

def __len__(self):

98

def __len__(self):

99

return len(self.p.index)

99

return len(self.p.index)

100

def load(self, pos):

100

def load(self, pos):

101

if pos < 0:

101

if pos < 0:

102

pos += len(self.p.index)

102

pos += len(self.p.index)

103

self.p.load(pos)

103

self.p.load(pos)

104

return self.p.index[pos]

104

return self.p.index[pos]

105

def __getitem__(self, pos):

105

def __getitem__(self, pos):

106

return self.p.index[pos] or self.load(pos)

106

return self.p.index[pos] or self.load(pos)

107

def append(self, e):

107

def append(self, e):

108

self.p.index.append(e)

108

self.p.index.append(e)

109

110

class lazymap:

110

class lazymap:

111

"""a lazy version of the node map"""

111

"""a lazy version of the node map"""

112

def __init__(self, parser):

112

def __init__(self, parser):

113

self.p = parser

113

self.p = parser

114

def load(self, key):

114

def load(self, key):

115

if self.p.all: return

115

if self.p.all: return

116

n = self.p.data.find(key)

116

n = self.p.data.find(key)

117

if n < 0:

117

if n < 0:

118

raise KeyError(key)

118

raise KeyError(key)

119

pos = n / self.p.s

119

pos = n / self.p.s

120

self.p.load(pos)

120

self.p.load(pos)

121

def __contains__(self, key):

121

def __contains__(self, key):

122

self.p.load()

122

self.p.load()

123

return key in self.p.map

123

return key in self.p.map

124

def __iter__(self):

124

def __iter__(self):

125

yield nullid

125

yield nullid

126

for i in xrange(self.p.l):

126

for i in xrange(self.p.l):

127

try:

127

try:

128

yield self.p.index[i][6]

128

yield self.p.index[i][6]

129

except:

129

except:

130

self.p.load(i)

130

self.p.load(i)

131

yield self.p.index[i][6]

131

yield self.p.index[i][6]

132

def __getitem__(self, key):

132

def __getitem__(self, key):

133

try:

133

try:

134

return self.p.map[key]

134

return self.p.map[key]

135

except KeyError:

135

except KeyError:

136

try:

136

try:

137

self.load(key)

137

self.load(key)

138

return self.p.map[key]

138

return self.p.map[key]

139

except KeyError:

139

except KeyError:

140

raise KeyError("node " + hex(key))

140

raise KeyError("node " + hex(key))

141

def __setitem__(self, key, val):

141

def __setitem__(self, key, val):

142

self.p.map[key] = val

142

self.p.map[key] = val

143

144

class RevlogError(Exception): pass

144

class RevlogError(Exception): pass

145

146

class revlog:

146

class revlog:

147

"""

147

"""

148

the underlying revision storage object

148

the underlying revision storage object

149

150

A revlog consists of two parts, an index and the revision data.

150

A revlog consists of two parts, an index and the revision data.

151

152

The index is a file with a fixed record size containing

152

The index is a file with a fixed record size containing

153

information on each revision, includings its nodeid (hash), the

153

information on each revision, includings its nodeid (hash), the

154

nodeids of its parents, the position and offset of its data within

154

nodeids of its parents, the position and offset of its data within

155

the data file, and the revision it's based on. Finally, each entry

155

the data file, and the revision it's based on. Finally, each entry

156

contains a linkrev entry that can serve as a pointer to external

156

contains a linkrev entry that can serve as a pointer to external

157

data.

157

data.

158

159

The revision data itself is a linear collection of data chunks.

159

The revision data itself is a linear collection of data chunks.

160

Each chunk represents a revision and is usually represented as a

160

Each chunk represents a revision and is usually represented as a

161

delta against the previous chunk. To bound lookup time, runs of

161

delta against the previous chunk. To bound lookup time, runs of

162

deltas are limited to about 2 times the length of the original

162

deltas are limited to about 2 times the length of the original

163

version data. This makes retrieval of a version proportional to

163

version data. This makes retrieval of a version proportional to

164

its size, or O(1) relative to the number of revisions.

164

its size, or O(1) relative to the number of revisions.

165

166

Both pieces of the revlog are written to in an append-only

166

Both pieces of the revlog are written to in an append-only

167

fashion, which means we never need to rewrite a file to insert or

167

fashion, which means we never need to rewrite a file to insert or

168

remove data, and can use some simple techniques to avoid the need

168

remove data, and can use some simple techniques to avoid the need

169

for locking while reading.

169

for locking while reading.

170

"""

170

"""

171

def __init__(self, opener, indexfile, datafile):

171

def __init__(self, opener, indexfile, datafile):

172

"""

172

"""

173

create a revlog object

173

create a revlog object

174

175

opener is a function that abstracts the file opening operation

175

opener is a function that abstracts the file opening operation

176

and can be used to implement COW semantics or the like.

176

and can be used to implement COW semantics or the like.

177

"""

177

"""

178

self.indexfile = indexfile

178

self.indexfile = indexfile

179

self.datafile = datafile

179

self.datafile = datafile

180

self.opener = opener

180

self.opener = opener

181

self.cache = None

181

self.cache = None

182

183

try:

183

try:

184

i = self.opener(self.indexfile).read()

184

i = self.opener(self.indexfile).read()

185

except IOError, inst:

185

except IOError, inst:

186

if inst.errno != errno.ENOENT:

186

if inst.errno != errno.ENOENT:

187

raise

187

raise

188

i = ""

188

i = ""

189

190

if len(i) > 10000:

190

if len(i) > 10000:

191

# big index, let's parse it on demand

191

# big index, let's parse it on demand

192

parser = lazyparser(i, self)

192

parser = lazyparser(i, self)

193

self.index = lazyindex(parser)

193

self.index = lazyindex(parser)

194

self.nodemap = lazymap(parser)

194

self.nodemap = lazymap(parser)

195

else:

195

else:

196

s = struct.calcsize(indexformat)

196

s = struct.calcsize(indexformat)

197

l = len(i) / s

197

l = len(i) / s

198

self.index = [None] * l

198

self.index = [None] * l

199

m = [None] * l

199

m = [None] * l

200

201

n = 0

201

n = 0

202

for f in xrange(0, len(i), s):

202

for f in xrange(0, len(i), s):

203

# offset, size, base, linkrev, p1, p2, nodeid

203

# offset, size, base, linkrev, p1, p2, nodeid

204

e = struct.unpack(indexformat, i[f:f + s])

204

e = struct.unpack(indexformat, i[f:f + s])

205

m[n] = (e[6], n)

205

m[n] = (e[6], n)

206

self.index[n] = e

206

self.index[n] = e

207

n += 1

207

n += 1

208

209

self.nodemap = dict(m)

209

self.nodemap = dict(m)

210

self.nodemap[nullid] = -1

210

self.nodemap[nullid] = -1

211

212

def tip(self): return self.node(len(self.index) - 1)

212

def tip(self): return self.node(len(self.index) - 1)

213

def count(self): return len(self.index)

213

def count(self): return len(self.index)

214

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

214

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

215

def rev(self, node):

215

def rev(self, node):

216

try:

216

try:

217

return self.nodemap[node]

217

return self.nodemap[node]

218

except KeyError:

218

except KeyError:

219

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

219

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

220

def linkrev(self, node): return self.index[self.rev(node)][3]

220

def linkrev(self, node): return self.index[self.rev(node)][3]

221

def parents(self, node):

221

def parents(self, node):

222

if node == nullid: return (nullid, nullid)

222

if node == nullid: return (nullid, nullid)

223

return self.index[self.rev(node)][4:6]

223

return self.index[self.rev(node)][4:6]

224

225

def start(self, rev): return self.index[rev][0]

225

def start(self, rev): return self.index[rev][0]

226

def length(self, rev): return self.index[rev][1]

226

def length(self, rev): return self.index[rev][1]

227

def end(self, rev): return self.start(rev) + self.length(rev)

227

def end(self, rev): return self.start(rev) + self.length(rev)

228

def base(self, rev): return self.index[rev][2]

228

def base(self, rev): return self.index[rev][2]

229

230

def reachable(self, rev, stop=None):

230

def reachable(self, rev, stop=None):

231

reachable = {}

231

reachable = {}

232

visit = [rev]

232

visit = [rev]

233

reachable[rev] = 1

233

reachable[rev] = 1

234

if stop:

234

if stop:

235

stopn = self.rev(stop)

235

stopn = self.rev(stop)

236

else:

236

else:

237

stopn = 0

237

stopn = 0

238

while visit:

238

while visit:

239

n = visit.pop(0)

239

n = visit.pop(0)

240

if n == stop:

240

if n == stop:

241

continue

241

continue

242

if n == nullid:

242

if n == nullid:

243

continue

243

continue

244

for p in self.parents(n):

244

for p in self.parents(n):

245

if self.rev(p) < stopn:

245

if self.rev(p) < stopn:

246

continue

246

continue

247

if p not in reachable:

247

if p not in reachable:

248

reachable[p] = 1

248

reachable[p] = 1

249

visit.append(p)

249

visit.append(p)

250

return reachable

250

return reachable

251

252

def nodesbetween(self, roots=None, heads=None):

252

def nodesbetween(self, roots=None, heads=None):

253

"""Return a tuple containing three elements. Elements 1 and 2 contain

253

"""Return a tuple containing three elements. Elements 1 and 2 contain

254

a final list bases and heads after all the unreachable ones have been

254

a final list bases and heads after all the unreachable ones have been

255

pruned. Element 0 contains a topologically sorted list of all

255

pruned. Element 0 contains a topologically sorted list of all

256

257

nodes that satisfy these constraints:

257

nodes that satisfy these constraints:

258

1. All nodes must be descended from a node in roots (the nodes on

258

1. All nodes must be descended from a node in roots (the nodes on

259

roots are considered descended from themselves).

259

roots are considered descended from themselves).

260

2. All nodes must also be ancestors of a node in heads (the nodes in

260

2. All nodes must also be ancestors of a node in heads (the nodes in

261

heads are considered to be their own ancestors).

261

heads are considered to be their own ancestors).

262

263

If roots is unspecified, nullid is assumed as the only root.

263

If roots is unspecified, nullid is assumed as the only root.

264

If heads is unspecified, it is taken to be the output of the

264

If heads is unspecified, it is taken to be the output of the

265

heads method (i.e. a list of all nodes in the repository that

265

heads method (i.e. a list of all nodes in the repository that

266

have no children)."""

266

have no children)."""

267

nonodes = ([], [], [])

267

nonodes = ([], [], [])

268

if roots is not None:

268

if roots is not None:

269

roots = list(roots)

269

roots = list(roots)

270

if not roots:

270

if not roots:

271

return nonodes

271

return nonodes

272

lowestrev = min([self.rev(n) for n in roots])

272

lowestrev = min([self.rev(n) for n in roots])

273

else:

273

else:

274

roots = [nullid] # Everybody's a descendent of nullid

274

roots = [nullid] # Everybody's a descendent of nullid

275

lowestrev = -1

275

lowestrev = -1

276

if (lowestrev == -1) and (heads is None):

276

if (lowestrev == -1) and (heads is None):

277

# We want _all_ the nodes!

277

# We want _all_ the nodes!

278

return ([self.node(r) for r in xrange(0, self.count())],

278

return ([self.node(r) for r in xrange(0, self.count())],

279

[nullid], list(self.heads()))

279

[nullid], list(self.heads()))

280

if heads is None:

280

if heads is None:

281

# All nodes are ancestors, so the latest ancestor is the last

281

# All nodes are ancestors, so the latest ancestor is the last

282

# node.

282

# node.

283

highestrev = self.count() - 1

283

highestrev = self.count() - 1

284

# Set ancestors to None to signal that every node is an ancestor.

284

# Set ancestors to None to signal that every node is an ancestor.

285

ancestors = None

285

ancestors = None

286

# Set heads to an empty dictionary for later discovery of heads

286

# Set heads to an empty dictionary for later discovery of heads

287

heads = {}

287

heads = {}

288

else:

288

else:

289

heads = list(heads)

289

heads = list(heads)

290

if not heads:

290

if not heads:

291

return nonodes

291

return nonodes

292

ancestors = {}

292

ancestors = {}

293

# Start at the top and keep marking parents until we're done.

293

# Start at the top and keep marking parents until we're done.

294

nodestotag = heads[:]

294

nodestotag = heads[:]

295

# Turn heads into a dictionary so we can remove 'fake' heads.

295

# Turn heads into a dictionary so we can remove 'fake' heads.

296

# Also, later we will be using it to filter out the heads we can't

296

# Also, later we will be using it to filter out the heads we can't

297

# find from roots.

297

# find from roots.

298

heads = dict.fromkeys(heads, 0)

298

heads = dict.fromkeys(heads, 0)

299

# Remember where the top was so we can use it as a limit later.

299

# Remember where the top was so we can use it as a limit later.

300

highestrev = max([self.rev(n) for n in nodestotag])

300

highestrev = max([self.rev(n) for n in nodestotag])

301

while nodestotag:

301

while nodestotag:

302

# grab a node to tag

302

# grab a node to tag

303

n = nodestotag.pop()

303

n = nodestotag.pop()

304

# Never tag nullid

304

# Never tag nullid

305

if n == nullid:

305

if n == nullid:

306

continue

306

continue

307

# A node's revision number represents its place in a

307

# A node's revision number represents its place in a

308

# topologically sorted list of nodes.

308

# topologically sorted list of nodes.

309

r = self.rev(n)

309

r = self.rev(n)

310

if r >= lowestrev:

310

if r >= lowestrev:

311

if n not in ancestors:

311

if n not in ancestors:

312

# If we are possibly a descendent of one of the roots

312

# If we are possibly a descendent of one of the roots

313

# and we haven't already been marked as an ancestor

313

# and we haven't already been marked as an ancestor

314

ancestors[n] = 1 # Mark as ancestor

314

ancestors[n] = 1 # Mark as ancestor

315

# Add non-nullid parents to list of nodes to tag.

315

# Add non-nullid parents to list of nodes to tag.

316

nodestotag.extend([p for p in self.parents(n) if

316

nodestotag.extend([p for p in self.parents(n) if

317

p != nullid])

317

p != nullid])

318

elif n in heads: # We've seen it before, is it a fake head?

318

elif n in heads: # We've seen it before, is it a fake head?

319

# So it is, real heads should not be the ancestors of

319

# So it is, real heads should not be the ancestors of

320

# any other heads.

320

# any other heads.

321

heads.pop(n)

321

heads.pop(n)

322

if not ancestors:

322

if not ancestors:

323

return nonodes

323

return nonodes

324

# Now that we have our set of ancestors, we want to remove any

324

# Now that we have our set of ancestors, we want to remove any

325

# roots that are not ancestors.

325

# roots that are not ancestors.

326

327

# If one of the roots was nullid, everything is included anyway.

327

# If one of the roots was nullid, everything is included anyway.

328

if lowestrev > -1:

328

if lowestrev > -1:

329

# But, since we weren't, let's recompute the lowest rev to not

329

# But, since we weren't, let's recompute the lowest rev to not

330

# include roots that aren't ancestors.

330

# include roots that aren't ancestors.

331

332

# Filter out roots that aren't ancestors of heads

332

# Filter out roots that aren't ancestors of heads

333

roots = [n for n in roots if n in ancestors]

333

roots = [n for n in roots if n in ancestors]

334

# Recompute the lowest revision

334

# Recompute the lowest revision

335

if roots:

335

if roots:

336

lowestrev = min([self.rev(n) for n in roots])

336

lowestrev = min([self.rev(n) for n in roots])

337

else:

337

else:

338

# No more roots? Return empty list

338

# No more roots? Return empty list

339

return nonodes

339

return nonodes

340

else:

340

else:

341

# We are descending from nullid, and don't need to care about

341

# We are descending from nullid, and don't need to care about

342

# any other roots.

342

# any other roots.

343

lowestrev = -1

343

lowestrev = -1

344

roots = [nullid]

344

roots = [nullid]

345

# Transform our roots list into a 'set' (i.e. a dictionary where the

345

# Transform our roots list into a 'set' (i.e. a dictionary where the

346

# values don't matter.

346

# values don't matter.

347

descendents = dict.fromkeys(roots, 1)

347

descendents = dict.fromkeys(roots, 1)

348

# Also, keep the original roots so we can filter out roots that aren't

348

# Also, keep the original roots so we can filter out roots that aren't

349

# 'real' roots (i.e. are descended from other roots).

349

# 'real' roots (i.e. are descended from other roots).

350

roots = descendents.copy()

350

roots = descendents.copy()

351

# Our topologically sorted list of output nodes.

351

# Our topologically sorted list of output nodes.

352

orderedout = []

352

orderedout = []

353

# Don't start at nullid since we don't want nullid in our output list,

353

# Don't start at nullid since we don't want nullid in our output list,

354

# and if nullid shows up in descedents, empty parents will look like

354

# and if nullid shows up in descedents, empty parents will look like

355

# they're descendents.

355

# they're descendents.

356

for r in xrange(max(lowestrev, 0), highestrev + 1):

356

for r in xrange(max(lowestrev, 0), highestrev + 1):

357

n = self.node(r)

357

n = self.node(r)

358

isdescendent = False

358

isdescendent = False

359

if lowestrev == -1: # Everybody is a descendent of nullid

359

if lowestrev == -1: # Everybody is a descendent of nullid

360

isdescendent = True

360

isdescendent = True

361

elif n in descendents:

361

elif n in descendents:

362

# n is already a descendent

362

# n is already a descendent

363

isdescendent = True

363

isdescendent = True

364

# This check only needs to be done here because all the roots

364

# This check only needs to be done here because all the roots

365

# will start being marked is descendents before the loop.

365

# will start being marked is descendents before the loop.

366

if n in roots:

366

if n in roots:

367

# If n was a root, check if it's a 'real' root.

367

# If n was a root, check if it's a 'real' root.

368

p = tuple(self.parents(n))

368

p = tuple(self.parents(n))

369

# If any of its parents are descendents, it's not a root.

369

# If any of its parents are descendents, it's not a root.

370

if (p[0] in descendents) or (p[1] in descendents):

370

if (p[0] in descendents) or (p[1] in descendents):

371

roots.pop(n)

371

roots.pop(n)

372

else:

372

else:

373

p = tuple(self.parents(n))

373

p = tuple(self.parents(n))

374

# A node is a descendent if either of its parents are

374

# A node is a descendent if either of its parents are

375

# descendents. (We seeded the dependents list with the roots

375

# descendents. (We seeded the dependents list with the roots

376

# up there, remember?)

376

# up there, remember?)

377

if (p[0] in descendents) or (p[1] in descendents):

377

if (p[0] in descendents) or (p[1] in descendents):

378

descendents[n] = 1

378

descendents[n] = 1

379

isdescendent = True

379

isdescendent = True

380

if isdescendent and ((ancestors is None) or (n in ancestors)):

380

if isdescendent and ((ancestors is None) or (n in ancestors)):

381

# Only include nodes that are both descendents and ancestors.

381

# Only include nodes that are both descendents and ancestors.

382

orderedout.append(n)

382

orderedout.append(n)

383

if (ancestors is not None) and (n in heads):

383

if (ancestors is not None) and (n in heads):

384

# We're trying to figure out which heads are reachable

384

# We're trying to figure out which heads are reachable

385

# from roots.

385

# from roots.

386

# Mark this head as having been reached

386

# Mark this head as having been reached

387

heads[n] = 1

387

heads[n] = 1

388

elif ancestors is None:

388

elif ancestors is None:

389

# Otherwise, we're trying to discover the heads.

389

# Otherwise, we're trying to discover the heads.

390

# Assume this is a head because if it isn't, the next step

390

# Assume this is a head because if it isn't, the next step

391

# will eventually remove it.

391

# will eventually remove it.

392

heads[n] = 1

392

heads[n] = 1

393

# But, obviously its parents aren't.

393

# But, obviously its parents aren't.

394

for p in self.parents(n):

394

for p in self.parents(n):

395

heads.pop(p, None)

395

heads.pop(p, None)

396

heads = [n for n in heads.iterkeys() if heads[n] != 0]

396

heads = [n for n in heads.iterkeys() if heads[n] != 0]

397

roots = roots.keys()

397

roots = roots.keys()

398

assert orderedout

398

assert orderedout

399

assert roots

399

assert roots

400

assert heads

400

assert heads

401

return (orderedout, roots, heads)

401

return (orderedout, roots, heads)

402

403

def heads(self, stop=None):

403

def heads(self, stop=None):

404

"""return the list of all nodes that have no children"""

404

"""return the list of all nodes that have no children"""

405

p = {}

405

p = {}

406

h = []

406

h = []

407

stoprev = 0

407

stoprev = 0

408

if stop and stop in self.nodemap:

408

if stop and stop in self.nodemap:

409

stoprev = self.rev(stop)

409

stoprev = self.rev(stop)

410

411

for r in range(self.count() - 1, -1, -1):

411

for r in range(self.count() - 1, -1, -1):

412

n = self.node(r)

412

n = self.node(r)

413

if n not in p:

413

if n not in p:

414

h.append(n)

414

h.append(n)

415

if n == stop:

415

if n == stop:

416

break

416

break

417

if r < stoprev:

417

if r < stoprev:

418

break

418

break

419

for pn in self.parents(n):

419

for pn in self.parents(n):

420

p[pn] = 1

420

p[pn] = 1

421

return h

421

return h

422

423

def children(self, node):

423

def children(self, node):

424

"""find the children of a given node"""

424

"""find the children of a given node"""

425

c = []

425

c = []

426

p = self.rev(node)

426

p = self.rev(node)

427

for r in range(p + 1, self.count()):

427

for r in range(p + 1, self.count()):

428

n = self.node(r)

428

n = self.node(r)

429

for pn in self.parents(n):

429

for pn in self.parents(n):

430

if pn == node:

430

if pn == node:

431

c.append(n)

431

c.append(n)

432

continue

432

continue

433

elif pn == nullid:

433

elif pn == nullid:

434

continue

434

continue

435

return c

435

return c

436

437

def lookup(self, id):

437

def lookup(self, id):

438

"""locate a node based on revision number or subset of hex nodeid"""

438

"""locate a node based on revision number or subset of hex nodeid"""

439

try:

439

try:

440

rev = int(id)

440

rev = int(id)

441

if str(rev) != id: raise ValueError

441

if str(rev) != id: raise ValueError

442

if rev < 0: rev = self.count() + rev

442

if rev < 0: rev = self.count() + rev

443

if rev < 0 or rev >= self.count(): raise ValueError

443

if rev < 0 or rev >= self.count(): raise ValueError

444

return self.node(rev)

444

return self.node(rev)

445

except (ValueError, OverflowError):

445

except (ValueError, OverflowError):

446

c = []

446

c = []

447

for n in self.nodemap:

447

for n in self.nodemap:

448

if hex(n).startswith(id):

448

if hex(n).startswith(id):

449

c.append(n)

449

c.append(n)

450

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

450

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

451

if len(c) < 1: raise RevlogError(_("No match found"))

451

if len(c) < 1: raise RevlogError(_("No match found"))

452

return c[0]

452

return c[0]

453

454

return None

454

return None

455

456

def diff(self, a, b):

456

def diff(self, a, b):

457

"""return a delta between two revisions"""

457

"""return a delta between two revisions"""

458

return mdiff.textdiff(a, b)

458

return mdiff.textdiff(a, b)

459

460

def patches(self, t, pl):

460

def patches(self, t, pl):

461

"""apply a list of patches to a string"""

461

"""apply a list of patches to a string"""

462

return mdiff.patches(t, pl)

462

return mdiff.patches(t, pl)

463

464

def delta(self, node):

464

def delta(self, node):

465

"""return or calculate a delta between a node and its predecessor"""

465

"""return or calculate a delta between a node and its predecessor"""

466

r = self.rev(node)

466

r = self.rev(node)

467

b = self.base(r)

467

b = self.base(r)

468

if r == b:

468

if r == b:

469

return self.diff(self.revision(self.node(r - 1)),

469

return self.diff(self.revision(self.node(r - 1)),

470

self.revision(node))

470

self.revision(node))

471

else:

471

else:

472

f = self.opener(self.datafile)

472

f = self.opener(self.datafile)

473

f.seek(self.start(r))

473

f.seek(self.start(r))

474

data = f.read(self.length(r))

474

data = f.read(self.length(r))

475

return decompress(data)

475

return decompress(data)

476

477

def revision(self, node):

477

def revision(self, node):

478

"""return an uncompressed revision of a given"""

478

"""return an uncompressed revision of a given"""

479

if node == nullid: return ""

479

if node == nullid: return ""

480

if self.cache and self.cache[0] == node: return self.cache[2]

480

if self.cache and self.cache[0] == node: return self.cache[2]

481

482

# look up what we need to read

482

# look up what we need to read

483

text = None

483

text = None

484

rev = self.rev(node)

484

rev = self.rev(node)

485

start, length, base, link, p1, p2, node = self.index[rev]

485

start, length, base, link, p1, p2, node = self.index[rev]

486

end = start + length

486

end = start + length

487

if base != rev: start = self.start(base)

487

if base != rev: start = self.start(base)

488

489

# do we have useful data cached?

489

# do we have useful data cached?

490

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

490

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

491

base = self.cache[1]

491

base = self.cache[1]

492

start = self.start(base + 1)

492

start = self.start(base + 1)

493

text = self.cache[2]

493

text = self.cache[2]

494

last = 0

494

last = 0

495

496

f = self.opener(self.datafile)

496

f = self.opener(self.datafile)

497

f.seek(start)

497

f.seek(start)

498

data = f.read(end - start)

498

data = f.read(end - start)

499

500

if text is None:

500

if text is None:

501

last = self.length(base)

501

last = self.length(base)

502

text = decompress(data[:last])

502

text = decompress(data[:last])

503

504

bins = []

504

bins = []

505

for r in xrange(base + 1, rev + 1):

505

for r in xrange(base + 1, rev + 1):

506

s = self.length(r)

506

s = self.length(r)

507

bins.append(decompress(data[last:last + s]))

507

bins.append(decompress(data[last:last + s]))

508

last = last + s

508

last = last + s

509

510

text = mdiff.patches(text, bins)

510

text = mdiff.patches(text, bins)

511

512

if node != hash(text, p1, p2):

512

if node != hash(text, p1, p2):

513

raise RevlogError(_("integrity check failed on %s:%d")

513

raise RevlogError(_("integrity check failed on %s:%d")

514

% (self.datafile, rev))

514

% (self.datafile, rev))

515

516

self.cache = (node, rev, text)

516

self.cache = (node, rev, text)

517

return text

517

return text

518

519

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

519

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

520

"""add a revision to the log

520

"""add a revision to the log

521

522

text - the revision data to add

522

text - the revision data to add

523

transaction - the transaction object used for rollback

523

transaction - the transaction object used for rollback

524

link - the linkrev data to add

524

link - the linkrev data to add

525

p1, p2 - the parent nodeids of the revision

525

p1, p2 - the parent nodeids of the revision

526

d - an optional precomputed delta

526

d - an optional precomputed delta

527

"""

527

"""

528

if text is None: text = ""

528

if text is None: text = ""

529

if p1 is None: p1 = self.tip()

529

if p1 is None: p1 = self.tip()

530

if p2 is None: p2 = nullid

530

if p2 is None: p2 = nullid

531

532

node = hash(text, p1, p2)

532

node = hash(text, p1, p2)

533

534

if node in self.nodemap:

534

if node in self.nodemap:

535

return node

535

return node

536

537

n = self.count()

537

n = self.count()

538

t = n - 1

538

t = n - 1

539

540

if n:

540

if n:

541

base = self.base(t)

541

base = self.base(t)

542

start = self.start(base)

542

start = self.start(base)

543

end = self.end(t)

543

end = self.end(t)

544

if not d:

544

if not d:

545

prev = self.revision(self.tip())

545

prev = self.revision(self.tip())

546

d = self.diff(prev, text)

546

d = self.diff(prev, text)

547

data = compress(d)

547

data = compress(d)

548

dist = end - start + len(data)

548

dist = end - start + len(data)

549

550

# full versions are inserted when the needed deltas

550

# full versions are inserted when the needed deltas

551

# become comparable to the uncompressed text

551

# become comparable to the uncompressed text

552

if not n or dist > len(text) * 2:

552

if not n or dist > len(text) * 2:

553

data = compress(text)

553

data = compress(text)

554

base = n

554

base = n

555

else:

555

else:

556

base = self.base(t)

556

base = self.base(t)

557

558

offset = 0

558

offset = 0

559

if t >= 0:

559

if t >= 0:

560

offset = self.end(t)

560

offset = self.end(t)

561

562

e = (offset, len(data), base, link, p1, p2, node)

562

e = (offset, len(data), base, link, p1, p2, node)

563

564

self.index.append(e)

564

self.index.append(e)

565

self.nodemap[node] = n

565

self.nodemap[node] = n

566

entry = struct.pack(indexformat, *e)

566

entry = struct.pack(indexformat, *e)

567

568

transaction.add(self.datafile, e[0])

568

transaction.add(self.datafile, e[0])

569

self.opener(self.datafile, "a").write(data)

569

self.opener(self.datafile, "a").write(data)

570

transaction.add(self.indexfile, n * len(entry))

570

transaction.add(self.indexfile, n * len(entry))

571

self.opener(self.indexfile, "a").write(entry)

571

self.opener(self.indexfile, "a").write(entry)

572

573

self.cache = (node, n, text)

573

self.cache = (node, n, text)

574

return node

574

return node

575

576

def ancestor(self, a, b):

576

def ancestor(self, a, b):

577

"""calculate the least common ancestor of nodes a and b"""

577

"""calculate the least common ancestor of nodes a and b"""

578

# calculate the distance of every node from root

578

# calculate the distance of every node from root

579

dist = {nullid: 0}

579

dist = {nullid: 0}

580

for i in xrange(self.count()):

580

for i in xrange(self.count()):

581

n = self.node(i)

581

n = self.node(i)

582

p1, p2 = self.parents(n)

582

p1, p2 = self.parents(n)

583

dist[n] = max(dist[p1], dist[p2]) + 1

583

dist[n] = max(dist[p1], dist[p2]) + 1

584

585

# traverse ancestors in order of decreasing distance from root

585

# traverse ancestors in order of decreasing distance from root

586

def ancestors(node):

586

def ancestors(node):

587

# we store negative distances because heap returns smallest member

587

# we store negative distances because heap returns smallest member

588

h = [(-dist[node], node)]

588

h = [(-dist[node], node)]

589

seen = {}

589

seen = {}

590

earliest = self.count()

590

earliest = self.count()

591

while h:

591

while h:

592

d, n = heapq.heappop(h)

592

d, n = heapq.heappop(h)

593

if n not in seen:

593

if n not in seen:

594

seen[n] = 1

594

seen[n] = 1

595

r = self.rev(n)

595

r = self.rev(n)

596

yield (-d, n)

596

yield (-d, n)

597

for p in self.parents(n):

597

for p in self.parents(n):

598

heapq.heappush(h, (-dist[p], p))

598

heapq.heappush(h, (-dist[p], p))

599

600

def generations(node):

600

def generations(node):

601

sg, s = None, {}

601

sg, s = None, {}

602

for g,n in ancestors(node):

602

for g,n in ancestors(node):

603

if g != sg:

603

if g != sg:

604

if sg:

604

if sg:

605

yield sg, s

605

yield sg, s

606

sg, s = g, {n:1}

606

sg, s = g, {n:1}

607

else:

607

else:

608

s[n] = 1

608

s[n] = 1

609

yield sg, s

609

yield sg, s

610

611

x = generations(a)

611

x = generations(a)

612

y = generations(b)

612

y = generations(b)

613

gx = x.next()

613

gx = x.next()

614

gy = y.next()

614

gy = y.next()

615

616

# increment each ancestor list until it is closer to root than

616

# increment each ancestor list until it is closer to root than

617

# the other, or they match

617

# the other, or they match

618

while 1:

618

while 1:

619

#print "ancestor gen %s %s" % (gx[0], gy[0])

619

#print "ancestor gen %s %s" % (gx[0], gy[0])

620

if gx[0] == gy[0]:

620

if gx[0] == gy[0]:

621

# find the intersection

621

# find the intersection

622

i = [ n for n in gx[1] if n in gy[1] ]

622

i = [ n for n in gx[1] if n in gy[1] ]

623

if i:

623

if i:

624

return i[0]

624

return i[0]

625

else:

625

else:

626

#print "next"

626

#print "next"

627

gy = y.next()

627

gy = y.next()

628

gx = x.next()

628

gx = x.next()

629

elif gx[0] < gy[0]:

629

elif gx[0] < gy[0]:

630

#print "next y"

630

#print "next y"

631

gy = y.next()

631

gy = y.next()

632

else:

632

else:

633

#print "next x"

633

#print "next x"

634

gx = x.next()

634

gx = x.next()

635

636

def group(self, nodelist, lookup, infocollect = None):

636

def group(self, nodelist, lookup, infocollect = None):

637

"""calculate a delta group

637

"""calculate a delta group

638

639

Given a list of changeset revs, return a set of deltas and

639

Given a list of changeset revs, return a set of deltas and

640

metadata corresponding to nodes. the first delta is

640

metadata corresponding to nodes. the first delta is

641

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

641

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

642

have this parent as it has all history before these

642

have this parent as it has all history before these

643

changesets. parent is parent[0]

643

changesets. parent is parent[0]

644

"""

644

"""

645

revs = [self.rev(n) for n in nodelist]

645

revs = [self.rev(n) for n in nodelist]

646

needed = dict.fromkeys(revs, 1)

646

needed = dict.fromkeys(revs, 1)

647

648

# if we don't have any revisions touched by these changesets, bail

648

# if we don't have any revisions touched by these changesets, bail

649

if not revs:

649

if not revs:

650

yield struct.pack(">l", 0)

650

yield struct.pack(">l", 0)

651

return

651

return

652

653

# add the parent of the first rev

653

# add the parent of the first rev

654

p = self.parents(self.node(revs[0]))[0]

654

p = self.parents(self.node(revs[0]))[0]

655

revs.insert(0, self.rev(p))

655

revs.insert(0, self.rev(p))

656

657

# for each delta that isn't contiguous in the log, we need to

657

# for each delta that isn't contiguous in the log, we need to

658

# reconstruct the base, reconstruct the result, and then

658

# reconstruct the base, reconstruct the result, and then

659

# calculate the delta. We also need to do this where we've

659

# calculate the delta. We also need to do this where we've

660

# stored a full version and not a delta

660

# stored a full version and not a delta

661

for i in xrange(0, len(revs) - 1):

661

for i in xrange(0, len(revs) - 1):

662

a, b = revs[i], revs[i + 1]

662

a, b = revs[i], revs[i + 1]

663

if a + 1 != b or self.base(b) == b:

663

if a + 1 != b or self.base(b) == b:

664

for j in xrange(self.base(a), a + 1):

664

for j in xrange(self.base(a), a + 1):

665

needed[j] = 1

665

needed[j] = 1

666

for j in xrange(self.base(b), b + 1):

666

for j in xrange(self.base(b), b + 1):

667

needed[j] = 1

667

needed[j] = 1

668

669

# calculate spans to retrieve from datafile

669

# calculate spans to retrieve from datafile

670

needed = needed.keys()

670

needed = needed.keys()

671

needed.sort()

671

needed.sort()

672

spans = []

672

spans = []

673

oo = -1

673

oo = -1

674

ol = 0

674

ol = 0

675

for n in needed:

675

for n in needed:

676

if n < 0: continue

676

if n < 0: continue

677

o = self.start(n)

677

o = self.start(n)

678

l = self.length(n)

678

l = self.length(n)

679

if oo + ol == o: # can we merge with the previous?

679

if oo + ol == o: # can we merge with the previous?

680

nl = spans[-1][2]

680

nl = spans[-1][2]

681

nl.append((n, l))

681

nl.append((n, l))

682

ol += l

682

ol += l

683

spans[-1] = (oo, ol, nl)

683

spans[-1] = (oo, ol, nl)

684

else:

684

else:

685

oo = o

685

oo = o

686

ol = l

686

ol = l

687

spans.append((oo, ol, [(n, l)]))

687

spans.append((oo, ol, [(n, l)]))

688

689

# read spans in, divide up chunks

689

# read spans in, divide up chunks

690

chunks = {}

690

chunks = {}

691

for span in spans:

691

for span in spans:

692

# we reopen the file for each span to make http happy for now

692

# we reopen the file for each span to make http happy for now

693

f = self.opener(self.datafile)

693

f = self.opener(self.datafile)

694

f.seek(span[0])

694

f.seek(span[0])

695

data = f.read(span[1])

695

data = f.read(span[1])

696

697

# divide up the span

697

# divide up the span

698

pos = 0

698

pos = 0

699

for r, l in span[2]:

699

for r, l in span[2]:

700

chunks[r] = decompress(data[pos: pos + l])

700

chunks[r] = decompress(data[pos: pos + l])

701

pos += l

701

pos += l

702

703

# helper to reconstruct intermediate versions

703

# helper to reconstruct intermediate versions

704

def construct(text, base, rev):

704

def construct(text, base, rev):

705

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

705

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

706

return mdiff.patches(text, bins)

706

return mdiff.patches(text, bins)

707

708

# build deltas

708

# build deltas

709

deltas = []

709

deltas = []

710

for d in xrange(0, len(revs) - 1):

710

for d in xrange(0, len(revs) - 1):

711

a, b = revs[d], revs[d + 1]

711

a, b = revs[d], revs[d + 1]

712

n = self.node(b)

712

n = self.node(b)

713

714

if infocollect is not None:

714

if infocollect is not None:

715

infocollect(n)

715

infocollect(n)

716

717

# do we need to construct a new delta?

717

# do we need to construct a new delta?

718

if a + 1 != b or self.base(b) == b:

718

if a + 1 != b or self.base(b) == b:

719

if a >= 0:

719

if a >= 0:

720

base = self.base(a)

720

base = self.base(a)

721

ta = chunks[self.base(a)]

721

ta = chunks[self.base(a)]

722

ta = construct(ta, base, a)

722

ta = construct(ta, base, a)

723

else:

723

else:

724

ta = ""

724

ta = ""

725

726

base = self.base(b)

726

base = self.base(b)

727

if a > base:

727

if a > base:

728

base = a

728

base = a

729

tb = ta

729

tb = ta

730

else:

730

else:

731

tb = chunks[self.base(b)]

731

tb = chunks[self.base(b)]

732

tb = construct(tb, base, b)

732

tb = construct(tb, base, b)

733

d = self.diff(ta, tb)

733

d = self.diff(ta, tb)

734

else:

734

else:

735

d = chunks[b]

735

d = chunks[b]

736

737

p = self.parents(n)

737

p = self.parents(n)

738

meta = n + p[0] + p[1] + lookup(n)

738

meta = n + p[0] + p[1] + lookup(n)

739

l = struct.pack(">l", len(meta) + len(d) + 4)

739

l = struct.pack(">l", len(meta) + len(d) + 4)

740

yield l

740

yield l

741

yield meta

741

yield meta

742

yield d

742

yield d

743

744

yield struct.pack(">l", 0)

744

yield struct.pack(">l", 0)

745

746

def addgroup(self, revs, linkmapper, transaction, unique=0):

746

def addgroup(self, revs, linkmapper, transaction, unique=0):

747

"""

747

"""

748

add a delta group

748

add a delta group

749

750

given a set of deltas, add them to the revision log. the

750

given a set of deltas, add them to the revision log. the

751

first delta is against its parent, which should be in our

751

first delta is against its parent, which should be in our

752

log, the rest are against the previous delta.

752

log, the rest are against the previous delta.

753

"""

753

"""

754

755

#track the base of the current delta log

755

#track the base of the current delta log

756

r = self.count()

756

r = self.count()

757

t = r - 1

757

t = r - 1

758

node = nullid

758

node = nullid

759

760

base = prev = -1

760

base = prev = -1

761

start = end = measure = 0

761

start = end = measure = 0

762

if r:

762

if r:

763

start = self.start(self.base(t))

763

start = self.start(self.base(t))

764

end = self.end(t)

764

end = self.end(t)

765

measure = self.length(self.base(t))

765

measure = self.length(self.base(t))

766

base = self.base(t)

766

base = self.base(t)

767

prev = self.tip()

767

prev = self.tip()

768

769

transaction.add(self.datafile, end)

769

transaction.add(self.datafile, end)

770

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

770

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

771

dfh = self.opener(self.datafile, "a")

771

dfh = self.opener(self.datafile, "a")

772

ifh = self.opener(self.indexfile, "a")

772

ifh = self.opener(self.indexfile, "a")

773

774

# loop through our set of deltas

774

# loop through our set of deltas

775

chain = None

775

chain = None

776

for chunk in revs:

776

for chunk in revs:

777

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

777

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

778

link = linkmapper(cs)

778

link = linkmapper(cs)

779

if node in self.nodemap:

779

if node in self.nodemap:

780

# this can happen if two branches make the same change

780

# this can happen if two branches make the same change

781

# if unique:

781

# if unique:

782

# raise RevlogError(_("already have %s") % hex(node[:4]))

782

# raise RevlogError(_("already have %s") % hex(node[:4]))

783

chain = node

783

chain = node

784

continue

784

continue

785

delta = chunk[80:]

785

delta = chunk[80:]

786

787

if not chain:

787

if not chain:

788

# retrieve the parent revision of the delta chain

788

# retrieve the parent revision of the delta chain

789

chain = p1

789

chain = p1

790

if not chain in self.nodemap:

790

if not chain in self.nodemap:

791

raise RevlogError(_("unknown base %s") % short(chain[:4]))

791

raise RevlogError(_("unknown base %s") % short(chain[:4]))

792

793

# full versions are inserted when the needed deltas become

793

# full versions are inserted when the needed deltas become

794

# comparable to the uncompressed text or when the previous

794

# comparable to the uncompressed text or when the previous

795

# version is not the one we have a delta against. We use

795

# version is not the one we have a delta against. We use

796

# the size of the previous full rev as a proxy for the

796

# the size of the previous full rev as a proxy for the

797

# current size.

797

# current size.

798

799

if chain == prev:

799

if chain == prev:

800

cdelta = compress(delta)

800

cdelta = compress(delta)

801

802

if chain != prev or (end - start + len(cdelta)) > measure * 2:

802

if chain != prev or (end - start + len(cdelta)) > measure * 2:

803

# flush our writes here so we can read it in revision

803

# flush our writes here so we can read it in revision

804

dfh.flush()

804

dfh.flush()

805

ifh.flush()

805

ifh.flush()

806

text = self.revision(chain)

806

text = self.revision(chain)

807

text = self.patches(text, [delta])

807

text = self.patches(text, [delta])

808

chk = self.addrevision(text, transaction, link, p1, p2)

808

chk = self.addrevision(text, transaction, link, p1, p2)

809

if chk != node:

809

if chk != node:

810

raise RevlogError(_("consistency error adding group"))

810

raise RevlogError(_("consistency error adding group"))

811

measure = len(text)

811

measure = len(text)

812

else:

812

else:

813

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

813

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

814

self.index.append(e)

814

self.index.append(e)

815

self.nodemap[node] = r

815

self.nodemap[node] = r

816

dfh.write(cdelta)

816

dfh.write(cdelta)

817

ifh.write(struct.pack(indexformat, *e))

817

ifh.write(struct.pack(indexformat, *e))

818

819

t, r, chain, prev = r, r + 1, node, node

819

t, r, chain, prev = r, r + 1, node, node

820

start = self.start(self.base(t))

820

start = self.start(self.base(t))

821

end = self.end(t)

821

end = self.end(t)

822

823

dfh.close()

823

dfh.close()

824

ifh.close()

824

ifh.close()

825

return node

825

return node

826

827

def checksize(self):

827

def checksize(self):

828

expected = 0

828

expected = 0

829

if self.count():

829

if self.count():

830

expected = self.end(self.count() - 1)

830

expected = self.end(self.count() - 1)

831

try:

831

f = self.opener(self.datafile)

832

f = self.opener(self.datafile)

832

f.seek(0, 2)

833

f.seek(0, 2)

833

actual = f.tell()

834

actual = f.tell()

834

return expected - actual

835

return expected - actual

836

except IOError, inst:

837

if inst.errno == errno.ENOENT:

838

return 0

839

raise

840

841

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text: return text
                 if len(text) < 44:
                     if text[0] == '\0': return text
                     return 'u' + text
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
                     if text[0] == '\0': return text
                     return 'u' + text
                 return bin
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %s") % t)
             indexformat = ">4l20s20s20s"
             class lazyparser:
                 """
                 this class avoids the need to parse the entirety of large indices
                 By default we parse and load 1000 entries at a time.
                 If no position is specified, we load the whole index, and replace
                 the lazy objects in revlog with the underlying objects for
                 efficiency in cases where we look at most of the nodes.
                 """
                 def __init__(self, data, revlog):
                     self.data = data
                     self.s = struct.calcsize(indexformat)
                     self.l = len(data)/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.all = 0
                     self.revlog = revlog
                 def load(self, pos=None):
                     if self.all: return
                     if pos is not None:
                         block = pos / 1000
                         i = block * 1000
                         end = min(self.l, i + 1000)
                     else:
                         self.all = 1
                         i = 0
                         end = self.l
                         self.revlog.index = self.index
                         self.revlog.nodemap = self.map
                     while i < end:
                         d = self.data[i * self.s: (i + 1) * self.s]
                         e = struct.unpack(indexformat, d)
                         self.index[i] = e
                         self.map[e[6]] = i
                         i += 1
             class lazyindex:
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.load(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap:
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     if self.p.all: return
                     n = self.p.data.find(key)
                     if n < 0:
                         raise KeyError(key)
                     pos = n / self.p.s
                     self.p.load(pos)
                 def __contains__(self, key):
                     self.p.load()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][6]
                         except:
                             self.p.load(i)
                             yield self.p.index[i][6]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
             class RevlogError(Exception): pass
             class revlog:
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.cache = None
                     try:
                         i = self.opener(self.indexfile).read()
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     if len(i) > 10000:
                         # big index, let's parse it on demand
                         parser = lazyparser(i, self)
                         self.index = lazyindex(parser)
                         self.nodemap = lazymap(parser)
                     else:
                         s = struct.calcsize(indexformat)
                         l = len(i) / s
                         self.index = [None] * l
                         m = [None] * l
                         n = 0
                         for f in xrange(0, len(i), s):
                             # offset, size, base, linkrev, p1, p2, nodeid
                             e = struct.unpack(indexformat, i[f:f + s])
                             m[n] = (e[6], n)
                             self.index[n] = e
                             n += 1
                         self.nodemap = dict(m)
                         self.nodemap[nullid] = -1
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][3]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     return self.index[self.rev(node)][4:6]
                 def start(self, rev): return self.index[rev][0]
                 def length(self, rev): return self.index[rev][1]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def base(self, rev): return self.index[rev][2]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, stop=None):
                     """return the list of all nodes that have no children"""
                     p = {}
                     h = []
                     stoprev = 0
                     if stop and stop in self.nodemap:
                         stoprev = self.rev(stop)
                     for r in range(self.count() - 1, -1, -1):
                         n = self.node(r)
                         if n not in p:
                             h.append(n)
                         if n == stop:
                             break
                         if r < stoprev:
                             break
                         for pn in self.parents(n):
                             p[pn] = 1
                     return h
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     b = self.base(r)
                     if r == b:
                         return self.diff(self.revision(self.node(r - 1)),
                                          self.revision(node))
                     else:
                         f = self.opener(self.datafile)
                         f.seek(self.start(r))
                         data = f.read(self.length(r))
                     return decompress(data)
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     start, length, base, link, p1, p2, node = self.index[rev]
                     end = start + length
                     if base != rev: start = self.start(base)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         start = self.start(base + 1)
                         text = self.cache[2]
                         last = 0
                     f = self.opener(self.datafile)
                     f.seek(start)
                     data = f.read(end - start)
                     if text is None:
                         last = self.length(base)
                         text = decompress(data[:last])
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         s = self.length(r)
                         bins.append(decompress(data[last:last + s]))
                         last = last + s
                     text = mdiff.patches(text, bins)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
                             d = self.diff(prev, text)
                         data = compress(d)
                         dist = end - start + len(data)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
                     e = (offset, len(data), base, link, p1, p2, node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(indexformat, *e)
                     transaction.add(self.datafile, e[0])
                     self.opener(self.datafile, "a").write(data)
                     transaction.add(self.indexfile, n * len(entry))
                     self.opener(self.indexfile, "a").write(entry)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         earliest = self.count()
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 r = self.rev(n)
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect = None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     needed = dict.fromkeys(revs, 1)
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield struct.pack(">l", 0)
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # for each delta that isn't contiguous in the log, we need to
                     # reconstruct the base, reconstruct the result, and then
                     # calculate the delta. We also need to do this where we've
                     # stored a full version and not a delta
                     for i in xrange(0, len(revs) - 1):
                         a, b = revs[i], revs[i + 1]
                         if a + 1 != b or self.base(b) == b:
                             for j in xrange(self.base(a), a + 1):
                                 needed[j] = 1
                             for j in xrange(self.base(b), b + 1):
                                 needed[j] = 1
                     # calculate spans to retrieve from datafile
                     needed = needed.keys()
                     needed.sort()
                     spans = []
                     oo = -1
                     ol = 0
                     for n in needed:
                         if n < 0: continue
                         o = self.start(n)
                         l = self.length(n)
                         if oo + ol == o: # can we merge with the previous?
                             nl = spans[-1][2]
                             nl.append((n, l))
                             ol += l
                             spans[-1] = (oo, ol, nl)
                         else:
                             oo = o
                             ol = l
                             spans.append((oo, ol, [(n, l)]))
                     # read spans in, divide up chunks
                     chunks = {}
                     for span in spans:
                         # we reopen the file for each span to make http happy for now
                         f = self.opener(self.datafile)
                         f.seek(span[0])
                         data = f.read(span[1])
                         # divide up the span
                         pos = 0
                         for r, l in span[2]:
                             chunks[r] = decompress(data[pos: pos + l])
                             pos += l
                     # helper to reconstruct intermediate versions
                     def construct(text, base, rev):
                         bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
                         return mdiff.patches(text, bins)
                     # build deltas
                     deltas = []
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         n = self.node(b)
                         if infocollect is not None:
                             infocollect(n)
                         # do we need to construct a new delta?
                         if a + 1 != b or self.base(b) == b:
                             if a >= 0:
                                 base = self.base(a)
                                 ta = chunks[self.base(a)]
                                 ta = construct(ta, base, a)
                             else:
                                 ta = ""
                             base = self.base(b)
                             if a > base:
                                 base = a
                                 tb = ta
                             else:
                                 tb = chunks[self.base(b)]
                             tb = construct(tb, base, b)
                             d = self.diff(ta, tb)
                         else:
                             d = chunks[b]
                         p = self.parents(n)
                         meta = n + p[0] + p[1] + lookup(n)
                         l = struct.pack(">l", len(meta) + len(d) + 4)
                         yield l
                         yield meta
                         yield d
                     yield struct.pack(">l", 0)
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = nullid
                     base = prev = -1
                     start = end = measure = 0
                     if r:
                         start = self.start(self.base(t))
                         end = self.end(t)
                         measure = self.length(self.base(t))
                         base = self.base(t)
                         prev = self.tip()
                     transaction.add(self.datafile, end)
                     transaction.add(self.indexfile, r * struct.calcsize(indexformat))
                     dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             cdelta = compress(delta)
                         if chain != prev or (end - start + len(cdelta)) > measure * 2:
                             # flush our writes here so we can read it in revision
                             dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             measure = len(text)
                         else:
                             e = (end, len(cdelta), self.base(t), link, p1, p2, node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             dfh.write(cdelta)
                             ifh.write(struct.pack(indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         start = self.start(self.base(t))
                         end = self.end(t)
                     dfh.close()
                     ifh.close()
                     return node
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
+                    try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         return expected - actual
+                    except IOError, inst:
+                        if inst.errno == errno.ENOENT:
+                            return 0
+                        raise