upstream/mercurial-mirror Commit - r1533:3d11f81c

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import gettext as _

14

from i18n import gettext as _

15

from demandload import demandload

15

from demandload import demandload

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

16

demandload(globals(), "binascii errno heapq mdiff sha struct zlib")

17

18

def hash(text, p1, p2):

18

def hash(text, p1, p2):

19

"""generate a hash from the given text and its parent hashes

19

"""generate a hash from the given text and its parent hashes

20

21

This hash combines both the current file contents and its history

21

This hash combines both the current file contents and its history

22

in a manner that makes it easy to distinguish nodes with the same

22

in a manner that makes it easy to distinguish nodes with the same

23

content in the revision graph.

23

content in the revision graph.

24

"""

24

"""

25

l = [p1, p2]

25

l = [p1, p2]

26

l.sort()

26

l.sort()

27

s = sha.new(l[0])

27

s = sha.new(l[0])

28

s.update(l[1])

28

s.update(l[1])

29

s.update(text)

29

s.update(text)

30

return s.digest()

30

return s.digest()

31

32

def compress(text):

32

def compress(text):

33

""" generate a possibly-compressed representation of text """

33

""" generate a possibly-compressed representation of text """

34

if not text: return text

34

if not text: return ("", text)

35

if len(text) < 44:

35

if len(text) < 44:

36

if text[0] == '\0': return text

36

if text[0] == '\0': return ("", text)

37

return 'u' + text

37

return ('u', text)

38

bin = zlib.compress(text)

38

bin = zlib.compress(text)

39

if len(bin) > len(text):

39

if len(bin) > len(text):

40

if text[0] == '\0': return text

40

if text[0] == '\0': return ("", text)

41

return 'u' + text

41

return ('u', text)

42

return bin

42

return ("", bin)

43

44

def decompress(bin):

44

def decompress(bin):

45

""" decompress the given input """

45

""" decompress the given input """

46

if not bin: return bin

46

if not bin: return bin

47

t = bin[0]

47

t = bin[0]

48

if t == '\0': return bin

48

if t == '\0': return bin

49

if t == 'x': return zlib.decompress(bin)

49

if t == 'x': return zlib.decompress(bin)

50

if t == 'u': return bin[1:]

50

if t == 'u': return bin[1:]

51

raise RevlogError(_("unknown compression type %s") % t)

51

raise RevlogError(_("unknown compression type %s") % t)

52

53

indexformat = ">4l20s20s20s"

53

indexformat = ">4l20s20s20s"

54

55

class lazyparser:

55

class lazyparser:

56

"""

56

"""

57

this class avoids the need to parse the entirety of large indices

57

this class avoids the need to parse the entirety of large indices

58

59

By default we parse and load 1000 entries at a time.

59

By default we parse and load 1000 entries at a time.

60

61

If no position is specified, we load the whole index, and replace

61

If no position is specified, we load the whole index, and replace

62

the lazy objects in revlog with the underlying objects for

62

the lazy objects in revlog with the underlying objects for

63

efficiency in cases where we look at most of the nodes.

63

efficiency in cases where we look at most of the nodes.

64

"""

64

"""

65

def __init__(self, data, revlog):

65

def __init__(self, data, revlog):

66

self.data = data

66

self.data = data

67

self.s = struct.calcsize(indexformat)

67

self.s = struct.calcsize(indexformat)

68

self.l = len(data)/self.s

68

self.l = len(data)/self.s

69

self.index = [None] * self.l

69

self.index = [None] * self.l

70

self.map = {nullid: -1}

70

self.map = {nullid: -1}

71

self.all = 0

71

self.all = 0

72

self.revlog = revlog

72

self.revlog = revlog

73

74

def load(self, pos=None):

74

def load(self, pos=None):

75

if self.all: return

75

if self.all: return

76

if pos is not None:

76

if pos is not None:

77

block = pos / 1000

77

block = pos / 1000

78

i = block * 1000

78

i = block * 1000

79

end = min(self.l, i + 1000)

79

end = min(self.l, i + 1000)

80

else:

80

else:

81

self.all = 1

81

self.all = 1

82

i = 0

82

i = 0

83

end = self.l

83

end = self.l

84

self.revlog.index = self.index

84

self.revlog.index = self.index

85

self.revlog.nodemap = self.map

85

self.revlog.nodemap = self.map

86

87

while i < end:

87

while i < end:

88

d = self.data[i * self.s: (i + 1) * self.s]

88

d = self.data[i * self.s: (i + 1) * self.s]

89

e = struct.unpack(indexformat, d)

89

e = struct.unpack(indexformat, d)

90

self.index[i] = e

90

self.index[i] = e

91

self.map[e[6]] = i

91

self.map[e[6]] = i

92

i += 1

92

i += 1

93

94

class lazyindex:

94

class lazyindex:

95

"""a lazy version of the index array"""

95

"""a lazy version of the index array"""

96

def __init__(self, parser):

96

def __init__(self, parser):

97

self.p = parser

97

self.p = parser

98

def __len__(self):

98

def __len__(self):

99

return len(self.p.index)

99

return len(self.p.index)

100

def load(self, pos):

100

def load(self, pos):

101

if pos < 0:

101

if pos < 0:

102

pos += len(self.p.index)

102

pos += len(self.p.index)

103

self.p.load(pos)

103

self.p.load(pos)

104

return self.p.index[pos]

104

return self.p.index[pos]

105

def __getitem__(self, pos):

105

def __getitem__(self, pos):

106

return self.p.index[pos] or self.load(pos)

106

return self.p.index[pos] or self.load(pos)

107

def append(self, e):

107

def append(self, e):

108

self.p.index.append(e)

108

self.p.index.append(e)

109

110

class lazymap:

110

class lazymap:

111

"""a lazy version of the node map"""

111

"""a lazy version of the node map"""

112

def __init__(self, parser):

112

def __init__(self, parser):

113

self.p = parser

113

self.p = parser

114

def load(self, key):

114

def load(self, key):

115

if self.p.all: return

115

if self.p.all: return

116

n = self.p.data.find(key)

116

n = self.p.data.find(key)

117

if n < 0:

117

if n < 0:

118

raise KeyError(key)

118

raise KeyError(key)

119

pos = n / self.p.s

119

pos = n / self.p.s

120

self.p.load(pos)

120

self.p.load(pos)

121

def __contains__(self, key):

121

def __contains__(self, key):

122

self.p.load()

122

self.p.load()

123

return key in self.p.map

123

return key in self.p.map

124

def __iter__(self):

124

def __iter__(self):

125

yield nullid

125

yield nullid

126

for i in xrange(self.p.l):

126

for i in xrange(self.p.l):

127

try:

127

try:

128

yield self.p.index[i][6]

128

yield self.p.index[i][6]

129

except:

129

except:

130

self.p.load(i)

130

self.p.load(i)

131

yield self.p.index[i][6]

131

yield self.p.index[i][6]

132

def __getitem__(self, key):

132

def __getitem__(self, key):

133

try:

133

try:

134

return self.p.map[key]

134

return self.p.map[key]

135

except KeyError:

135

except KeyError:

136

try:

136

try:

137

self.load(key)

137

self.load(key)

138

return self.p.map[key]

138

return self.p.map[key]

139

except KeyError:

139

except KeyError:

140

raise KeyError("node " + hex(key))

140

raise KeyError("node " + hex(key))

141

def __setitem__(self, key, val):

141

def __setitem__(self, key, val):

142

self.p.map[key] = val

142

self.p.map[key] = val

143

144

class RevlogError(Exception): pass

144

class RevlogError(Exception): pass

145

146

class revlog:

146

class revlog:

147

"""

147

"""

148

the underlying revision storage object

148

the underlying revision storage object

149

150

A revlog consists of two parts, an index and the revision data.

150

A revlog consists of two parts, an index and the revision data.

151

152

The index is a file with a fixed record size containing

152

The index is a file with a fixed record size containing

153

information on each revision, includings its nodeid (hash), the

153

information on each revision, includings its nodeid (hash), the

154

nodeids of its parents, the position and offset of its data within

154

nodeids of its parents, the position and offset of its data within

155

the data file, and the revision it's based on. Finally, each entry

155

the data file, and the revision it's based on. Finally, each entry

156

contains a linkrev entry that can serve as a pointer to external

156

contains a linkrev entry that can serve as a pointer to external

157

data.

157

data.

158

159

The revision data itself is a linear collection of data chunks.

159

The revision data itself is a linear collection of data chunks.

160

Each chunk represents a revision and is usually represented as a

160

Each chunk represents a revision and is usually represented as a

161

delta against the previous chunk. To bound lookup time, runs of

161

delta against the previous chunk. To bound lookup time, runs of

162

deltas are limited to about 2 times the length of the original

162

deltas are limited to about 2 times the length of the original

163

version data. This makes retrieval of a version proportional to

163

version data. This makes retrieval of a version proportional to

164

its size, or O(1) relative to the number of revisions.

164

its size, or O(1) relative to the number of revisions.

165

166

Both pieces of the revlog are written to in an append-only

166

Both pieces of the revlog are written to in an append-only

167

fashion, which means we never need to rewrite a file to insert or

167

fashion, which means we never need to rewrite a file to insert or

168

remove data, and can use some simple techniques to avoid the need

168

remove data, and can use some simple techniques to avoid the need

169

for locking while reading.

169

for locking while reading.

170

"""

170

"""

171

def __init__(self, opener, indexfile, datafile):

171

def __init__(self, opener, indexfile, datafile):

172

"""

172

"""

173

create a revlog object

173

create a revlog object

174

175

opener is a function that abstracts the file opening operation

175

opener is a function that abstracts the file opening operation

176

and can be used to implement COW semantics or the like.

176

and can be used to implement COW semantics or the like.

177

"""

177

"""

178

self.indexfile = indexfile

178

self.indexfile = indexfile

179

self.datafile = datafile

179

self.datafile = datafile

180

self.opener = opener

180

self.opener = opener

181

self.cache = None

181

self.cache = None

182

183

try:

183

try:

184

i = self.opener(self.indexfile).read()

184

i = self.opener(self.indexfile).read()

185

except IOError, inst:

185

except IOError, inst:

186

if inst.errno != errno.ENOENT:

186

if inst.errno != errno.ENOENT:

187

raise

187

raise

188

i = ""

188

i = ""

189

190

if len(i) > 10000:

190

if len(i) > 10000:

191

# big index, let's parse it on demand

191

# big index, let's parse it on demand

192

parser = lazyparser(i, self)

192

parser = lazyparser(i, self)

193

self.index = lazyindex(parser)

193

self.index = lazyindex(parser)

194

self.nodemap = lazymap(parser)

194

self.nodemap = lazymap(parser)

195

else:

195

else:

196

s = struct.calcsize(indexformat)

196

s = struct.calcsize(indexformat)

197

l = len(i) / s

197

l = len(i) / s

198

self.index = [None] * l

198

self.index = [None] * l

199

m = [None] * l

199

m = [None] * l

200

201

n = 0

201

n = 0

202

for f in xrange(0, len(i), s):

202

for f in xrange(0, len(i), s):

203

# offset, size, base, linkrev, p1, p2, nodeid

203

# offset, size, base, linkrev, p1, p2, nodeid

204

e = struct.unpack(indexformat, i[f:f + s])

204

e = struct.unpack(indexformat, i[f:f + s])

205

m[n] = (e[6], n)

205

m[n] = (e[6], n)

206

self.index[n] = e

206

self.index[n] = e

207

n += 1

207

n += 1

208

209

self.nodemap = dict(m)

209

self.nodemap = dict(m)

210

self.nodemap[nullid] = -1

210

self.nodemap[nullid] = -1

211

212

def tip(self): return self.node(len(self.index) - 1)

212

def tip(self): return self.node(len(self.index) - 1)

213

def count(self): return len(self.index)

213

def count(self): return len(self.index)

214

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

214

def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]

215

def rev(self, node):

215

def rev(self, node):

216

try:

216

try:

217

return self.nodemap[node]

217

return self.nodemap[node]

218

except KeyError:

218

except KeyError:

219

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

219

raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))

220

def linkrev(self, node): return self.index[self.rev(node)][3]

220

def linkrev(self, node): return self.index[self.rev(node)][3]

221

def parents(self, node):

221

def parents(self, node):

222

if node == nullid: return (nullid, nullid)

222

if node == nullid: return (nullid, nullid)

223

return self.index[self.rev(node)][4:6]

223

return self.index[self.rev(node)][4:6]

224

225

def start(self, rev): return self.index[rev][0]

225

def start(self, rev): return self.index[rev][0]

226

def length(self, rev): return self.index[rev][1]

226

def length(self, rev): return self.index[rev][1]

227

def end(self, rev): return self.start(rev) + self.length(rev)

227

def end(self, rev): return self.start(rev) + self.length(rev)

228

def base(self, rev): return self.index[rev][2]

228

def base(self, rev): return self.index[rev][2]

229

230

def reachable(self, rev, stop=None):

230

def reachable(self, rev, stop=None):

231

reachable = {}

231

reachable = {}

232

visit = [rev]

232

visit = [rev]

233

reachable[rev] = 1

233

reachable[rev] = 1

234

if stop:

234

if stop:

235

stopn = self.rev(stop)

235

stopn = self.rev(stop)

236

else:

236

else:

237

stopn = 0

237

stopn = 0

238

while visit:

238

while visit:

239

n = visit.pop(0)

239

n = visit.pop(0)

240

if n == stop:

240

if n == stop:

241

continue

241

continue

242

if n == nullid:

242

if n == nullid:

243

continue

243

continue

244

for p in self.parents(n):

244

for p in self.parents(n):

245

if self.rev(p) < stopn:

245

if self.rev(p) < stopn:

246

continue

246

continue

247

if p not in reachable:

247

if p not in reachable:

248

reachable[p] = 1

248

reachable[p] = 1

249

visit.append(p)

249

visit.append(p)

250

return reachable

250

return reachable

251

252

def nodesbetween(self, roots=None, heads=None):

252

def nodesbetween(self, roots=None, heads=None):

253

"""Return a tuple containing three elements. Elements 1 and 2 contain

253

"""Return a tuple containing three elements. Elements 1 and 2 contain

254

a final list bases and heads after all the unreachable ones have been

254

a final list bases and heads after all the unreachable ones have been

255

pruned. Element 0 contains a topologically sorted list of all

255

pruned. Element 0 contains a topologically sorted list of all

256

257

nodes that satisfy these constraints:

257

nodes that satisfy these constraints:

258

1. All nodes must be descended from a node in roots (the nodes on

258

1. All nodes must be descended from a node in roots (the nodes on

259

roots are considered descended from themselves).

259

roots are considered descended from themselves).

260

2. All nodes must also be ancestors of a node in heads (the nodes in

260

2. All nodes must also be ancestors of a node in heads (the nodes in

261

heads are considered to be their own ancestors).

261

heads are considered to be their own ancestors).

262

263

If roots is unspecified, nullid is assumed as the only root.

263

If roots is unspecified, nullid is assumed as the only root.

264

If heads is unspecified, it is taken to be the output of the

264

If heads is unspecified, it is taken to be the output of the

265

heads method (i.e. a list of all nodes in the repository that

265

heads method (i.e. a list of all nodes in the repository that

266

have no children)."""

266

have no children)."""

267

nonodes = ([], [], [])

267

nonodes = ([], [], [])

268

if roots is not None:

268

if roots is not None:

269

roots = list(roots)

269

roots = list(roots)

270

if not roots:

270

if not roots:

271

return nonodes

271

return nonodes

272

lowestrev = min([self.rev(n) for n in roots])

272

lowestrev = min([self.rev(n) for n in roots])

273

else:

273

else:

274

roots = [nullid] # Everybody's a descendent of nullid

274

roots = [nullid] # Everybody's a descendent of nullid

275

lowestrev = -1

275

lowestrev = -1

276

if (lowestrev == -1) and (heads is None):

276

if (lowestrev == -1) and (heads is None):

277

# We want _all_ the nodes!

277

# We want _all_ the nodes!

278

return ([self.node(r) for r in xrange(0, self.count())],

278

return ([self.node(r) for r in xrange(0, self.count())],

279

[nullid], list(self.heads()))

279

[nullid], list(self.heads()))

280

if heads is None:

280

if heads is None:

281

# All nodes are ancestors, so the latest ancestor is the last

281

# All nodes are ancestors, so the latest ancestor is the last

282

# node.

282

# node.

283

highestrev = self.count() - 1

283

highestrev = self.count() - 1

284

# Set ancestors to None to signal that every node is an ancestor.

284

# Set ancestors to None to signal that every node is an ancestor.

285

ancestors = None

285

ancestors = None

286

# Set heads to an empty dictionary for later discovery of heads

286

# Set heads to an empty dictionary for later discovery of heads

287

heads = {}

287

heads = {}

288

else:

288

else:

289

heads = list(heads)

289

heads = list(heads)

290

if not heads:

290

if not heads:

291

return nonodes

291

return nonodes

292

ancestors = {}

292

ancestors = {}

293

# Start at the top and keep marking parents until we're done.

293

# Start at the top and keep marking parents until we're done.

294

nodestotag = heads[:]

294

nodestotag = heads[:]

295

# Turn heads into a dictionary so we can remove 'fake' heads.

295

# Turn heads into a dictionary so we can remove 'fake' heads.

296

# Also, later we will be using it to filter out the heads we can't

296

# Also, later we will be using it to filter out the heads we can't

297

# find from roots.

297

# find from roots.

298

heads = dict.fromkeys(heads, 0)

298

heads = dict.fromkeys(heads, 0)

299

# Remember where the top was so we can use it as a limit later.

299

# Remember where the top was so we can use it as a limit later.

300

highestrev = max([self.rev(n) for n in nodestotag])

300

highestrev = max([self.rev(n) for n in nodestotag])

301

while nodestotag:

301

while nodestotag:

302

# grab a node to tag

302

# grab a node to tag

303

n = nodestotag.pop()

303

n = nodestotag.pop()

304

# Never tag nullid

304

# Never tag nullid

305

if n == nullid:

305

if n == nullid:

306

continue

306

continue

307

# A node's revision number represents its place in a

307

# A node's revision number represents its place in a

308

# topologically sorted list of nodes.

308

# topologically sorted list of nodes.

309

r = self.rev(n)

309

r = self.rev(n)

310

if r >= lowestrev:

310

if r >= lowestrev:

311

if n not in ancestors:

311

if n not in ancestors:

312

# If we are possibly a descendent of one of the roots

312

# If we are possibly a descendent of one of the roots

313

# and we haven't already been marked as an ancestor

313

# and we haven't already been marked as an ancestor

314

ancestors[n] = 1 # Mark as ancestor

314

ancestors[n] = 1 # Mark as ancestor

315

# Add non-nullid parents to list of nodes to tag.

315

# Add non-nullid parents to list of nodes to tag.

316

nodestotag.extend([p for p in self.parents(n) if

316

nodestotag.extend([p for p in self.parents(n) if

317

p != nullid])

317

p != nullid])

318

elif n in heads: # We've seen it before, is it a fake head?

318

elif n in heads: # We've seen it before, is it a fake head?

319

# So it is, real heads should not be the ancestors of

319

# So it is, real heads should not be the ancestors of

320

# any other heads.

320

# any other heads.

321

heads.pop(n)

321

heads.pop(n)

322

if not ancestors:

322

if not ancestors:

323

return nonodes

323

return nonodes

324

# Now that we have our set of ancestors, we want to remove any

324

# Now that we have our set of ancestors, we want to remove any

325

# roots that are not ancestors.

325

# roots that are not ancestors.

326

327

# If one of the roots was nullid, everything is included anyway.

327

# If one of the roots was nullid, everything is included anyway.

328

if lowestrev > -1:

328

if lowestrev > -1:

329

# But, since we weren't, let's recompute the lowest rev to not

329

# But, since we weren't, let's recompute the lowest rev to not

330

# include roots that aren't ancestors.

330

# include roots that aren't ancestors.

331

332

# Filter out roots that aren't ancestors of heads

332

# Filter out roots that aren't ancestors of heads

333

roots = [n for n in roots if n in ancestors]

333

roots = [n for n in roots if n in ancestors]

334

# Recompute the lowest revision

334

# Recompute the lowest revision

335

if roots:

335

if roots:

336

lowestrev = min([self.rev(n) for n in roots])

336

lowestrev = min([self.rev(n) for n in roots])

337

else:

337

else:

338

# No more roots? Return empty list

338

# No more roots? Return empty list

339

return nonodes

339

return nonodes

340

else:

340

else:

341

# We are descending from nullid, and don't need to care about

341

# We are descending from nullid, and don't need to care about

342

# any other roots.

342

# any other roots.

343

lowestrev = -1

343

lowestrev = -1

344

roots = [nullid]

344

roots = [nullid]

345

# Transform our roots list into a 'set' (i.e. a dictionary where the

345

# Transform our roots list into a 'set' (i.e. a dictionary where the

346

# values don't matter.

346

# values don't matter.

347

descendents = dict.fromkeys(roots, 1)

347

descendents = dict.fromkeys(roots, 1)

348

# Also, keep the original roots so we can filter out roots that aren't

348

# Also, keep the original roots so we can filter out roots that aren't

349

# 'real' roots (i.e. are descended from other roots).

349

# 'real' roots (i.e. are descended from other roots).

350

roots = descendents.copy()

350

roots = descendents.copy()

351

# Our topologically sorted list of output nodes.

351

# Our topologically sorted list of output nodes.

352

orderedout = []

352

orderedout = []

353

# Don't start at nullid since we don't want nullid in our output list,

353

# Don't start at nullid since we don't want nullid in our output list,

354

# and if nullid shows up in descedents, empty parents will look like

354

# and if nullid shows up in descedents, empty parents will look like

355

# they're descendents.

355

# they're descendents.

356

for r in xrange(max(lowestrev, 0), highestrev + 1):

356

for r in xrange(max(lowestrev, 0), highestrev + 1):

357

n = self.node(r)

357

n = self.node(r)

358

isdescendent = False

358

isdescendent = False

359

if lowestrev == -1: # Everybody is a descendent of nullid

359

if lowestrev == -1: # Everybody is a descendent of nullid

360

isdescendent = True

360

isdescendent = True

361

elif n in descendents:

361

elif n in descendents:

362

# n is already a descendent

362

# n is already a descendent

363

isdescendent = True

363

isdescendent = True

364

# This check only needs to be done here because all the roots

364

# This check only needs to be done here because all the roots

365

# will start being marked is descendents before the loop.

365

# will start being marked is descendents before the loop.

366

if n in roots:

366

if n in roots:

367

# If n was a root, check if it's a 'real' root.

367

# If n was a root, check if it's a 'real' root.

368

p = tuple(self.parents(n))

368

p = tuple(self.parents(n))

369

# If any of its parents are descendents, it's not a root.

369

# If any of its parents are descendents, it's not a root.

370

if (p[0] in descendents) or (p[1] in descendents):

370

if (p[0] in descendents) or (p[1] in descendents):

371

roots.pop(n)

371

roots.pop(n)

372

else:

372

else:

373

p = tuple(self.parents(n))

373

p = tuple(self.parents(n))

374

# A node is a descendent if either of its parents are

374

# A node is a descendent if either of its parents are

375

# descendents. (We seeded the dependents list with the roots

375

# descendents. (We seeded the dependents list with the roots

376

# up there, remember?)

376

# up there, remember?)

377

if (p[0] in descendents) or (p[1] in descendents):

377

if (p[0] in descendents) or (p[1] in descendents):

378

descendents[n] = 1

378

descendents[n] = 1

379

isdescendent = True

379

isdescendent = True

380

if isdescendent and ((ancestors is None) or (n in ancestors)):

380

if isdescendent and ((ancestors is None) or (n in ancestors)):

381

# Only include nodes that are both descendents and ancestors.

381

# Only include nodes that are both descendents and ancestors.

382

orderedout.append(n)

382

orderedout.append(n)

383

if (ancestors is not None) and (n in heads):

383

if (ancestors is not None) and (n in heads):

384

# We're trying to figure out which heads are reachable

384

# We're trying to figure out which heads are reachable

385

# from roots.

385

# from roots.

386

# Mark this head as having been reached

386

# Mark this head as having been reached

387

heads[n] = 1

387

heads[n] = 1

388

elif ancestors is None:

388

elif ancestors is None:

389

# Otherwise, we're trying to discover the heads.

389

# Otherwise, we're trying to discover the heads.

390

# Assume this is a head because if it isn't, the next step

390

# Assume this is a head because if it isn't, the next step

391

# will eventually remove it.

391

# will eventually remove it.

392

heads[n] = 1

392

heads[n] = 1

393

# But, obviously its parents aren't.

393

# But, obviously its parents aren't.

394

for p in self.parents(n):

394

for p in self.parents(n):

395

heads.pop(p, None)

395

heads.pop(p, None)

396

heads = [n for n in heads.iterkeys() if heads[n] != 0]

396

heads = [n for n in heads.iterkeys() if heads[n] != 0]

397

roots = roots.keys()

397

roots = roots.keys()

398

assert orderedout

398

assert orderedout

399

assert roots

399

assert roots

400

assert heads

400

assert heads

401

return (orderedout, roots, heads)

401

return (orderedout, roots, heads)

402

403

def heads(self, stop=None):

403

def heads(self, stop=None):

404

"""return the list of all nodes that have no children"""

404

"""return the list of all nodes that have no children"""

405

p = {}

405

p = {}

406

h = []

406

h = []

407

stoprev = 0

407

stoprev = 0

408

if stop and stop in self.nodemap:

408

if stop and stop in self.nodemap:

409

stoprev = self.rev(stop)

409

stoprev = self.rev(stop)

410

411

for r in range(self.count() - 1, -1, -1):

411

for r in range(self.count() - 1, -1, -1):

412

n = self.node(r)

412

n = self.node(r)

413

if n not in p:

413

if n not in p:

414

h.append(n)

414

h.append(n)

415

if n == stop:

415

if n == stop:

416

break

416

break

417

if r < stoprev:

417

if r < stoprev:

418

break

418

break

419

for pn in self.parents(n):

419

for pn in self.parents(n):

420

p[pn] = 1

420

p[pn] = 1

421

return h

421

return h

422

423

def children(self, node):

423

def children(self, node):

424

"""find the children of a given node"""

424

"""find the children of a given node"""

425

c = []

425

c = []

426

p = self.rev(node)

426

p = self.rev(node)

427

for r in range(p + 1, self.count()):

427

for r in range(p + 1, self.count()):

428

n = self.node(r)

428

n = self.node(r)

429

for pn in self.parents(n):

429

for pn in self.parents(n):

430

if pn == node:

430

if pn == node:

431

c.append(n)

431

c.append(n)

432

continue

432

continue

433

elif pn == nullid:

433

elif pn == nullid:

434

continue

434

continue

435

return c

435

return c

436

437

def lookup(self, id):

437

def lookup(self, id):

438

"""locate a node based on revision number or subset of hex nodeid"""

438

"""locate a node based on revision number or subset of hex nodeid"""

439

try:

439

try:

440

rev = int(id)

440

rev = int(id)

441

if str(rev) != id: raise ValueError

441

if str(rev) != id: raise ValueError

442

if rev < 0: rev = self.count() + rev

442

if rev < 0: rev = self.count() + rev

443

if rev < 0 or rev >= self.count(): raise ValueError

443

if rev < 0 or rev >= self.count(): raise ValueError

444

return self.node(rev)

444

return self.node(rev)

445

except (ValueError, OverflowError):

445

except (ValueError, OverflowError):

446

c = []

446

c = []

447

for n in self.nodemap:

447

for n in self.nodemap:

448

if hex(n).startswith(id):

448

if hex(n).startswith(id):

449

c.append(n)

449

c.append(n)

450

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

450

if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))

451

if len(c) < 1: raise RevlogError(_("No match found"))

451

if len(c) < 1: raise RevlogError(_("No match found"))

452

return c[0]

452

return c[0]

453

454

return None

454

return None

455

456

def diff(self, a, b):

456

def diff(self, a, b):

457

"""return a delta between two revisions"""

457

"""return a delta between two revisions"""

458

return mdiff.textdiff(a, b)

458

return mdiff.textdiff(a, b)

459

460

def patches(self, t, pl):

460

def patches(self, t, pl):

461

"""apply a list of patches to a string"""

461

"""apply a list of patches to a string"""

462

return mdiff.patches(t, pl)

462

return mdiff.patches(t, pl)

463

464

def delta(self, node):

464

def delta(self, node):

465

"""return or calculate a delta between a node and its predecessor"""

465

"""return or calculate a delta between a node and its predecessor"""

466

r = self.rev(node)

466

r = self.rev(node)

467

b = self.base(r)

467

b = self.base(r)

468

if r == b:

468

if r == b:

469

return self.diff(self.revision(self.node(r - 1)),

469

return self.diff(self.revision(self.node(r - 1)),

470

self.revision(node))

470

self.revision(node))

471

else:

471

else:

472

f = self.opener(self.datafile)

472

f = self.opener(self.datafile)

473

f.seek(self.start(r))

473

f.seek(self.start(r))

474

data = f.read(self.length(r))

474

data = f.read(self.length(r))

475

return decompress(data)

475

return decompress(data)

476

477

def revision(self, node):

477

def revision(self, node):

478

"""return an uncompressed revision of a given"""

478

"""return an uncompressed revision of a given"""

479

if node == nullid: return ""

479

if node == nullid: return ""

480

if self.cache and self.cache[0] == node: return self.cache[2]

480

if self.cache and self.cache[0] == node: return self.cache[2]

481

482

# look up what we need to read

482

# look up what we need to read

483

text = None

483

text = None

484

rev = self.rev(node)

484

rev = self.rev(node)

485

start, length, base, link, p1, p2, node = self.index[rev]

485

start, length, base, link, p1, p2, node = self.index[rev]

486

end = start + length

486

end = start + length

487

if base != rev: start = self.start(base)

487

if base != rev: start = self.start(base)

488

489

# do we have useful data cached?

489

# do we have useful data cached?

490

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

490

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

491

base = self.cache[1]

491

base = self.cache[1]

492

start = self.start(base + 1)

492

start = self.start(base + 1)

493

text = self.cache[2]

493

text = self.cache[2]

494

last = 0

494

last = 0

495

496

f = self.opener(self.datafile)

496

f = self.opener(self.datafile)

497

f.seek(start)

497

f.seek(start)

498

data = f.read(end - start)

498

data = f.read(end - start)

499

500

if text is None:

500

if text is None:

501

last = self.length(base)

501

last = self.length(base)

502

text = decompress(data[:last])

502

text = decompress(data[:last])

503

504

bins = []

504

bins = []

505

for r in xrange(base + 1, rev + 1):

505

for r in xrange(base + 1, rev + 1):

506

s = self.length(r)

506

s = self.length(r)

507

bins.append(decompress(data[last:last + s]))

507

bins.append(decompress(data[last:last + s]))

508

last = last + s

508

last = last + s

509

510

text = mdiff.patches(text, bins)

510

text = mdiff.patches(text, bins)

511

512

if node != hash(text, p1, p2):

512

if node != hash(text, p1, p2):

513

raise RevlogError(_("integrity check failed on %s:%d")

513

raise RevlogError(_("integrity check failed on %s:%d")

514

% (self.datafile, rev))

514

% (self.datafile, rev))

515

516

self.cache = (node, rev, text)

516

self.cache = (node, rev, text)

517

return text

517

return text

518

519

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

519

def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):

520

"""add a revision to the log

520

"""add a revision to the log

521

522

text - the revision data to add

522

text - the revision data to add

523

transaction - the transaction object used for rollback

523

transaction - the transaction object used for rollback

524

link - the linkrev data to add

524

link - the linkrev data to add

525

p1, p2 - the parent nodeids of the revision

525

p1, p2 - the parent nodeids of the revision

526

d - an optional precomputed delta

526

d - an optional precomputed delta

527

"""

527

"""

528

if text is None: text = ""

528

if text is None: text = ""

529

if p1 is None: p1 = self.tip()

529

if p1 is None: p1 = self.tip()

530

if p2 is None: p2 = nullid

530

if p2 is None: p2 = nullid

531

532

node = hash(text, p1, p2)

532

node = hash(text, p1, p2)

533

534

if node in self.nodemap:

534

if node in self.nodemap:

535

return node

535

return node

536

537

n = self.count()

537

n = self.count()

538

t = n - 1

538

t = n - 1

539

540

if n:

540

if n:

541

base = self.base(t)

541

base = self.base(t)

542

start = self.start(base)

542

start = self.start(base)

543

end = self.end(t)

543

end = self.end(t)

544

if not d:

544

if not d:

545

prev = self.revision(self.tip())

545

prev = self.revision(self.tip())

546

d = self.diff(prev, text)

546

d = self.diff(prev, str(text))

547

data = compress(d)

547

data = compress(d)

548

~~dist~~ = ~~end~~ - ~~start~~ + len(data)

548

l = len(data[1]) + len(data[0])

549

dist = end - start + l

549

550

# full versions are inserted when the needed deltas

551

# full versions are inserted when the needed deltas

551

# become comparable to the uncompressed text

552

# become comparable to the uncompressed text

552

if not n or dist > len(text) * 2:

553

if not n or dist > len(text) * 2:

553

data = compress(text)

554

data = compress(text)

555

l = len(data[1]) + len(data[0])

554

base = n

556

base = n

555

else:

557

else:

556

base = self.base(t)

558

base = self.base(t)

557

559

558

offset = 0

560

offset = 0

559

if t >= 0:

561

if t >= 0:

560

offset = self.end(t)

562

offset = self.end(t)

561

563

562

e = (offset, len(~~data~~), base, link, p1, p2, node)

564

e = (offset, l, base, link, p1, p2, node)

563

565

564

self.index.append(e)

566

self.index.append(e)

565

self.nodemap[node] = n

567

self.nodemap[node] = n

566

entry = struct.pack(indexformat, *e)

568

entry = struct.pack(indexformat, *e)

567

569

568

transaction.add(self.datafile, e[0])

570

transaction.add(self.datafile, e[0])

569

self.opener(self.datafile, "a").~~write~~(~~data~~)

571

f = self.opener(self.datafile, "a")

572

if data[0]:

573

f.write(data[0])

574

f.write(data[1])

570

transaction.add(self.indexfile, n * len(entry))

575

transaction.add(self.indexfile, n * len(entry))

571

self.opener(self.indexfile, "a").write(entry)

576

self.opener(self.indexfile, "a").write(entry)

572

577

573

self.cache = (node, n, text)

578

self.cache = (node, n, text)

574

return node

579

return node

575

580

576

def ancestor(self, a, b):

581

def ancestor(self, a, b):

577

"""calculate the least common ancestor of nodes a and b"""

582

"""calculate the least common ancestor of nodes a and b"""

578

# calculate the distance of every node from root

583

# calculate the distance of every node from root

579

dist = {nullid: 0}

584

dist = {nullid: 0}

580

for i in xrange(self.count()):

585

for i in xrange(self.count()):

581

n = self.node(i)

586

n = self.node(i)

582

p1, p2 = self.parents(n)

587

p1, p2 = self.parents(n)

583

dist[n] = max(dist[p1], dist[p2]) + 1

588

dist[n] = max(dist[p1], dist[p2]) + 1

584

589

585

# traverse ancestors in order of decreasing distance from root

590

# traverse ancestors in order of decreasing distance from root

586

def ancestors(node):

591

def ancestors(node):

587

# we store negative distances because heap returns smallest member

592

# we store negative distances because heap returns smallest member

588

h = [(-dist[node], node)]

593

h = [(-dist[node], node)]

589

seen = {}

594

seen = {}

590

earliest = self.count()

595

earliest = self.count()

591

while h:

596

while h:

592

d, n = heapq.heappop(h)

597

d, n = heapq.heappop(h)

593

if n not in seen:

598

if n not in seen:

594

seen[n] = 1

599

seen[n] = 1

595

r = self.rev(n)

600

r = self.rev(n)

596

yield (-d, n)

601

yield (-d, n)

597

for p in self.parents(n):

602

for p in self.parents(n):

598

heapq.heappush(h, (-dist[p], p))

603

heapq.heappush(h, (-dist[p], p))

599

604

600

def generations(node):

605

def generations(node):

601

sg, s = None, {}

606

sg, s = None, {}

602

for g,n in ancestors(node):

607

for g,n in ancestors(node):

603

if g != sg:

608

if g != sg:

604

if sg:

609

if sg:

605

yield sg, s

610

yield sg, s

606

sg, s = g, {n:1}

611

sg, s = g, {n:1}

607

else:

612

else:

608

s[n] = 1

613

s[n] = 1

609

yield sg, s

614

yield sg, s

610

615

611

x = generations(a)

616

x = generations(a)

612

y = generations(b)

617

y = generations(b)

613

gx = x.next()

618

gx = x.next()

614

gy = y.next()

619

gy = y.next()

615

620

616

# increment each ancestor list until it is closer to root than

621

# increment each ancestor list until it is closer to root than

617

# the other, or they match

622

# the other, or they match

618

while 1:

623

while 1:

619

#print "ancestor gen %s %s" % (gx[0], gy[0])

624

#print "ancestor gen %s %s" % (gx[0], gy[0])

620

if gx[0] == gy[0]:

625

if gx[0] == gy[0]:

621

# find the intersection

626

# find the intersection

622

i = [ n for n in gx[1] if n in gy[1] ]

627

i = [ n for n in gx[1] if n in gy[1] ]

623

if i:

628

if i:

624

return i[0]

629

return i[0]

625

else:

630

else:

626

#print "next"

631

#print "next"

627

gy = y.next()

632

gy = y.next()

628

gx = x.next()

633

gx = x.next()

629

elif gx[0] < gy[0]:

634

elif gx[0] < gy[0]:

630

#print "next y"

635

#print "next y"

631

gy = y.next()

636

gy = y.next()

632

else:

637

else:

633

#print "next x"

638

#print "next x"

634

gx = x.next()

639

gx = x.next()

635

640

636

def group(self, nodelist, lookup, infocollect = None):

641

def group(self, nodelist, lookup, infocollect = None):

637

"""calculate a delta group

642

"""calculate a delta group

638

643

639

Given a list of changeset revs, return a set of deltas and

644

Given a list of changeset revs, return a set of deltas and

640

metadata corresponding to nodes. the first delta is

645

metadata corresponding to nodes. the first delta is

641

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

646

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

642

have this parent as it has all history before these

647

have this parent as it has all history before these

643

changesets. parent is parent[0]

648

changesets. parent is parent[0]

644

"""

649

"""

645

revs = [self.rev(n) for n in nodelist]

650

revs = [self.rev(n) for n in nodelist]

646

needed = dict.fromkeys(revs, 1)

651

needed = dict.fromkeys(revs, 1)

647

652

648

# if we don't have any revisions touched by these changesets, bail

653

# if we don't have any revisions touched by these changesets, bail

649

if not revs:

654

if not revs:

650

yield struct.pack(">l", 0)

655

yield struct.pack(">l", 0)

651

return

656

return

652

657

653

# add the parent of the first rev

658

# add the parent of the first rev

654

p = self.parents(self.node(revs[0]))[0]

659

p = self.parents(self.node(revs[0]))[0]

655

revs.insert(0, self.rev(p))

660

revs.insert(0, self.rev(p))

656

661

657

# for each delta that isn't contiguous in the log, we need to

662

# for each delta that isn't contiguous in the log, we need to

658

# reconstruct the base, reconstruct the result, and then

663

# reconstruct the base, reconstruct the result, and then

659

# calculate the delta. We also need to do this where we've

664

# calculate the delta. We also need to do this where we've

660

# stored a full version and not a delta

665

# stored a full version and not a delta

661

for i in xrange(0, len(revs) - 1):

666

for i in xrange(0, len(revs) - 1):

662

a, b = revs[i], revs[i + 1]

667

a, b = revs[i], revs[i + 1]

663

if a + 1 != b or self.base(b) == b:

668

if a + 1 != b or self.base(b) == b:

664

for j in xrange(self.base(a), a + 1):

669

for j in xrange(self.base(a), a + 1):

665

needed[j] = 1

670

needed[j] = 1

666

for j in xrange(self.base(b), b + 1):

671

for j in xrange(self.base(b), b + 1):

667

needed[j] = 1

672

needed[j] = 1

668

673

669

# calculate spans to retrieve from datafile

674

# calculate spans to retrieve from datafile

670

needed = needed.keys()

675

needed = needed.keys()

671

needed.sort()

676

needed.sort()

672

spans = []

677

spans = []

673

oo = -1

678

oo = -1

674

ol = 0

679

ol = 0

675

for n in needed:

680

for n in needed:

676

if n < 0: continue

681

if n < 0: continue

677

o = self.start(n)

682

o = self.start(n)

678

l = self.length(n)

683

l = self.length(n)

679

if oo + ol == o: # can we merge with the previous?

684

if oo + ol == o: # can we merge with the previous?

680

nl = spans[-1][2]

685

nl = spans[-1][2]

681

nl.append((n, l))

686

nl.append((n, l))

682

ol += l

687

ol += l

683

spans[-1] = (oo, ol, nl)

688

spans[-1] = (oo, ol, nl)

684

else:

689

else:

685

oo = o

690

oo = o

686

ol = l

691

ol = l

687

spans.append((oo, ol, [(n, l)]))

692

spans.append((oo, ol, [(n, l)]))

688

693

689

# read spans in, divide up chunks

694

# read spans in, divide up chunks

690

chunks = {}

695

chunks = {}

691

for span in spans:

696

for span in spans:

692

# we reopen the file for each span to make http happy for now

697

# we reopen the file for each span to make http happy for now

693

f = self.opener(self.datafile)

698

f = self.opener(self.datafile)

694

f.seek(span[0])

699

f.seek(span[0])

695

data = f.read(span[1])

700

data = f.read(span[1])

696

701

697

# divide up the span

702

# divide up the span

698

pos = 0

703

pos = 0

699

for r, l in span[2]:

704

for r, l in span[2]:

700

chunks[r] = decompress(data[pos: pos + l])

705

chunks[r] = decompress(data[pos: pos + l])

701

pos += l

706

pos += l

702

707

703

# helper to reconstruct intermediate versions

708

# helper to reconstruct intermediate versions

704

def construct(text, base, rev):

709

def construct(text, base, rev):

705

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

710

bins = [chunks[r] for r in xrange(base + 1, rev + 1)]

706

return mdiff.patches(text, bins)

711

return mdiff.patches(text, bins)

707

712

708

# build deltas

713

# build deltas

709

deltas = []

714

deltas = []

710

for d in xrange(0, len(revs) - 1):

715

for d in xrange(0, len(revs) - 1):

711

a, b = revs[d], revs[d + 1]

716

a, b = revs[d], revs[d + 1]

712

n = self.node(b)

717

n = self.node(b)

713

718

714

if infocollect is not None:

719

if infocollect is not None:

715

infocollect(n)

720

infocollect(n)

716

721

717

# do we need to construct a new delta?

722

# do we need to construct a new delta?

718

if a + 1 != b or self.base(b) == b:

723

if a + 1 != b or self.base(b) == b:

719

if a >= 0:

724

if a >= 0:

720

base = self.base(a)

725

base = self.base(a)

721

ta = chunks[self.base(a)]

726

ta = chunks[self.base(a)]

722

ta = construct(ta, base, a)

727

ta = construct(ta, base, a)

723

else:

728

else:

724

ta = ""

729

ta = ""

725

730

726

base = self.base(b)

731

base = self.base(b)

727

if a > base:

732

if a > base:

728

base = a

733

base = a

729

tb = ta

734

tb = ta

730

else:

735

else:

731

tb = chunks[self.base(b)]

736

tb = chunks[self.base(b)]

732

tb = construct(tb, base, b)

737

tb = construct(tb, base, b)

733

d = self.diff(ta, tb)

738

d = self.diff(ta, tb)

734

else:

739

else:

735

d = chunks[b]

740

d = chunks[b]

736

741

737

p = self.parents(n)

742

p = self.parents(n)

738

meta = n + p[0] + p[1] + lookup(n)

743

meta = n + p[0] + p[1] + lookup(n)

739

l = struct.pack(">l", len(meta) + len(d) + 4)

744

l = struct.pack(">l", len(meta) + len(d) + 4)

740

yield l

745

yield l

741

yield meta

746

yield meta

742

yield d

747

yield d

743

748

744

yield struct.pack(">l", 0)

749

yield struct.pack(">l", 0)

745

750

746

def addgroup(self, revs, linkmapper, transaction, unique=0):

751

def addgroup(self, revs, linkmapper, transaction, unique=0):

747

"""

752

"""

748

add a delta group

753

add a delta group

749

754

750

given a set of deltas, add them to the revision log. the

755

given a set of deltas, add them to the revision log. the

751

first delta is against its parent, which should be in our

756

first delta is against its parent, which should be in our

752

log, the rest are against the previous delta.

757

log, the rest are against the previous delta.

753

"""

758

"""

754

759

755

#track the base of the current delta log

760

#track the base of the current delta log

756

r = self.count()

761

r = self.count()

757

t = r - 1

762

t = r - 1

758

node = nullid

763

node = nullid

759

764

760

base = prev = -1

765

base = prev = -1

761

start = end = measure = 0

766

start = end = measure = 0

762

if r:

767

if r:

763

start = self.start(self.base(t))

768

start = self.start(self.base(t))

764

end = self.end(t)

769

end = self.end(t)

765

measure = self.length(self.base(t))

770

measure = self.length(self.base(t))

766

base = self.base(t)

771

base = self.base(t)

767

prev = self.tip()

772

prev = self.tip()

768

773

769

transaction.add(self.datafile, end)

774

transaction.add(self.datafile, end)

770

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

775

transaction.add(self.indexfile, r * struct.calcsize(indexformat))

771

dfh = self.opener(self.datafile, "a")

776

dfh = self.opener(self.datafile, "a")

772

ifh = self.opener(self.indexfile, "a")

777

ifh = self.opener(self.indexfile, "a")

773

778

774

# loop through our set of deltas

779

# loop through our set of deltas

775

chain = None

780

chain = None

776

for chunk in revs:

781

for chunk in revs:

777

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

782

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

778

link = linkmapper(cs)

783

link = linkmapper(cs)

779

if node in self.nodemap:

784

if node in self.nodemap:

780

# this can happen if two branches make the same change

785

# this can happen if two branches make the same change

781

# if unique:

786

# if unique:

782

# raise RevlogError(_("already have %s") % hex(node[:4]))

787

# raise RevlogError(_("already have %s") % hex(node[:4]))

783

chain = node

788

chain = node

784

continue

789

continue

785

delta = chunk[80:]

790

delta = chunk[80:]

786

791

787

for p in (p1, p2):

792

for p in (p1, p2):

788

if not p in self.nodemap:

793

if not p in self.nodemap:

789

raise RevlogError(_("unknown parent %s") % short(p1))

794

raise RevlogError(_("unknown parent %s") % short(p1))

790

795

791

if not chain:

796

if not chain:

792

# retrieve the parent revision of the delta chain

797

# retrieve the parent revision of the delta chain

793

chain = p1

798

chain = p1

794

if not chain in self.nodemap:

799

if not chain in self.nodemap:

795

raise RevlogError(_("unknown base %s") % short(chain[:4]))

800

raise RevlogError(_("unknown base %s") % short(chain[:4]))

796

801

797

# full versions are inserted when the needed deltas become

802

# full versions are inserted when the needed deltas become

798

# comparable to the uncompressed text or when the previous

803

# comparable to the uncompressed text or when the previous

799

# version is not the one we have a delta against. We use

804

# version is not the one we have a delta against. We use

800

# the size of the previous full rev as a proxy for the

805

# the size of the previous full rev as a proxy for the

801

# current size.

806

# current size.

802

807

803

if chain == prev:

808

if chain == prev:

804

~~cdelta~~ = compress(delta)

809

tempd = compress(delta)

810

cdelta = tempd[0] + tempd[1]

805

811

806

if chain != prev or (end - start + len(cdelta)) > measure * 2:

812

if chain != prev or (end - start + len(cdelta)) > measure * 2:

807

# flush our writes here so we can read it in revision

813

# flush our writes here so we can read it in revision

808

dfh.flush()

814

dfh.flush()

809

ifh.flush()

815

ifh.flush()

810

text = self.revision(chain)

816

text = self.revision(chain)

811

text = self.patches(text, [delta])

817

text = self.patches(text, [delta])

812

chk = self.addrevision(text, transaction, link, p1, p2)

818

chk = self.addrevision(text, transaction, link, p1, p2)

813

if chk != node:

819

if chk != node:

814

raise RevlogError(_("consistency error adding group"))

820

raise RevlogError(_("consistency error adding group"))

815

measure = len(text)

821

measure = len(text)

816

else:

822

else:

817

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

823

e = (end, len(cdelta), self.base(t), link, p1, p2, node)

818

self.index.append(e)

824

self.index.append(e)

819

self.nodemap[node] = r

825

self.nodemap[node] = r

820

dfh.write(cdelta)

826

dfh.write(cdelta)

821

ifh.write(struct.pack(indexformat, *e))

827

ifh.write(struct.pack(indexformat, *e))

822

828

823

t, r, chain, prev = r, r + 1, node, node

829

t, r, chain, prev = r, r + 1, node, node

824

start = self.start(self.base(t))

830

start = self.start(self.base(t))

825

end = self.end(t)

831

end = self.end(t)

826

832

827

dfh.close()

833

dfh.close()

828

ifh.close()

834

ifh.close()

829

return node

835

return node

830

836

831

def checksize(self):

837

def checksize(self):

832

expected = 0

838

expected = 0

833

if self.count():

839

if self.count():

834

expected = self.end(self.count() - 1)

840

expected = self.end(self.count() - 1)

835

try:

841

try:

836

f = self.opener(self.datafile)

842

f = self.opener(self.datafile)

837

f.seek(0, 2)

843

f.seek(0, 2)

838

actual = f.tell()

844

actual = f.tell()

839

return expected - actual

845

return expected - actual

840

except IOError, inst:

846

except IOError, inst:

841

if inst.errno == errno.ENOENT:

847

if inst.errno == errno.ENOENT:

842

return 0

848

return 0

843

raise

849

raise

844

850

845

851

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import gettext as _
             from demandload import demandload
             demandload(globals(), "binascii errno heapq mdiff sha struct zlib")
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = sha.new(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
-                if not text: return text
+                if not text: return ("", text)
                 if len(text) < 44:
-                    if text[0] == '\0': return text
+                    if text[0] == '\0': return ("", text)
-                    return 'u' + text
+                    return ('u', text)
                 bin = zlib.compress(text)
                 if len(bin) > len(text):
-                    if text[0] == '\0': return text
+                    if text[0] == '\0': return ("", text)
-                    return 'u' + text
+                    return ('u', text)
-                return bin
+                return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin: return bin
                 t = bin[0]
                 if t == '\0': return bin
                 if t == 'x': return zlib.decompress(bin)
                 if t == 'u': return bin[1:]
                 raise RevlogError(_("unknown compression type %s") % t)
             indexformat = ">4l20s20s20s"
             class lazyparser:
                 """
                 this class avoids the need to parse the entirety of large indices
                 By default we parse and load 1000 entries at a time.
                 If no position is specified, we load the whole index, and replace
                 the lazy objects in revlog with the underlying objects for
                 efficiency in cases where we look at most of the nodes.
                 """
                 def __init__(self, data, revlog):
                     self.data = data
                     self.s = struct.calcsize(indexformat)
                     self.l = len(data)/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: -1}
                     self.all = 0
                     self.revlog = revlog
                 def load(self, pos=None):
                     if self.all: return
                     if pos is not None:
                         block = pos / 1000
                         i = block * 1000
                         end = min(self.l, i + 1000)
                     else:
                         self.all = 1
                         i = 0
                         end = self.l
                         self.revlog.index = self.index
                         self.revlog.nodemap = self.map
                     while i < end:
                         d = self.data[i * self.s: (i + 1) * self.s]
                         e = struct.unpack(indexformat, d)
                         self.index[i] = e
                         self.map[e[6]] = i
                         i += 1
             class lazyindex:
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.load(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return self.p.index[pos] or self.load(pos)
                 def append(self, e):
                     self.p.index.append(e)
             class lazymap:
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     if self.p.all: return
                     n = self.p.data.find(key)
                     if n < 0:
                         raise KeyError(key)
                     pos = n / self.p.s
                     self.p.load(pos)
                 def __contains__(self, key):
                     self.p.load()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         try:
                             yield self.p.index[i][6]
                         except:
                             self.p.load(i)
                             yield self.p.index[i][6]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
             class RevlogError(Exception): pass
             class revlog:
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, datafile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = datafile
                     self.opener = opener
                     self.cache = None
                     try:
                         i = self.opener(self.indexfile).read()
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         i = ""
                     if len(i) > 10000:
                         # big index, let's parse it on demand
                         parser = lazyparser(i, self)
                         self.index = lazyindex(parser)
                         self.nodemap = lazymap(parser)
                     else:
                         s = struct.calcsize(indexformat)
                         l = len(i) / s
                         self.index = [None] * l
                         m = [None] * l
                         n = 0
                         for f in xrange(0, len(i), s):
                             # offset, size, base, linkrev, p1, p2, nodeid
                             e = struct.unpack(indexformat, i[f:f + s])
                             m[n] = (e[6], n)
                             self.index[n] = e
                             n += 1
                         self.nodemap = dict(m)
                         self.nodemap[nullid] = -1
                 def tip(self): return self.node(len(self.index) - 1)
                 def count(self): return len(self.index)
                 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def linkrev(self, node): return self.index[self.rev(node)][3]
                 def parents(self, node):
                     if node == nullid: return (nullid, nullid)
                     return self.index[self.rev(node)][4:6]
                 def start(self, rev): return self.index[rev][0]
                 def length(self, rev): return self.index[rev][1]
                 def end(self, rev): return self.start(rev) + self.length(rev)
                 def base(self, rev): return self.index[rev][2]
                 def reachable(self, rev, stop=None):
                     reachable = {}
                     visit = [rev]
                     reachable[rev] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = -1
                     if (lowestrev == -1) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads[:]
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > -1:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = -1
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == -1:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, stop=None):
                     """return the list of all nodes that have no children"""
                     p = {}
                     h = []
                     stoprev = 0
                     if stop and stop in self.nodemap:
                         stoprev = self.rev(stop)
                     for r in range(self.count() - 1, -1, -1):
                         n = self.node(r)
                         if n not in p:
                             h.append(n)
                         if n == stop:
                             break
                         if r < stoprev:
                             break
                         for pn in self.parents(n):
                             p[pn] = 1
                     return h
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         n = self.node(r)
                         for pn in self.parents(n):
                             if pn == node:
                                 c.append(n)
                                 continue
                             elif pn == nullid:
                                 continue
                     return c
                 def lookup(self, id):
                     """locate a node based on revision number or subset of hex nodeid"""
                     try:
                         rev = int(id)
                         if str(rev) != id: raise ValueError
                         if rev < 0: rev = self.count() + rev
                         if rev < 0 or rev >= self.count(): raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         c = []
                         for n in self.nodemap:
                             if hex(n).startswith(id):
                                 c.append(n)
                         if len(c) > 1: raise RevlogError(_("Ambiguous identifier"))
                         if len(c) < 1: raise RevlogError(_("No match found"))
                         return c[0]
                     return None
                 def diff(self, a, b):
                     """return a delta between two revisions"""
                     return mdiff.textdiff(a, b)
                 def patches(self, t, pl):
                     """apply a list of patches to a string"""
                     return mdiff.patches(t, pl)
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     b = self.base(r)
                     if r == b:
                         return self.diff(self.revision(self.node(r - 1)),
                                          self.revision(node))
                     else:
                         f = self.opener(self.datafile)
                         f.seek(self.start(r))
                         data = f.read(self.length(r))
                     return decompress(data)
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid: return ""
                     if self.cache and self.cache[0] == node: return self.cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     start, length, base, link, p1, p2, node = self.index[rev]
                     end = start + length
                     if base != rev: start = self.start(base)
                     # do we have useful data cached?
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         start = self.start(base + 1)
                         text = self.cache[2]
                         last = 0
                     f = self.opener(self.datafile)
                     f.seek(start)
                     data = f.read(end - start)
                     if text is None:
                         last = self.length(base)
                         text = decompress(data[:last])
                     bins = []
                     for r in xrange(base + 1, rev + 1):
                         s = self.length(r)
                         bins.append(decompress(data[last:last + s]))
                         last = last + s
                     text = mdiff.patches(text, bins)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                       % (self.datafile, rev))
                     self.cache = (node, rev, text)
                     return text
                 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     if text is None: text = ""
                     if p1 is None: p1 = self.tip()
                     if p2 is None: p2 = nullid
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     n = self.count()
                     t = n - 1
                     if n:
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                         if not d:
                             prev = self.revision(self.tip())
-                            d = self.diff(prev, text)
+                            d = self.diff(prev, str(text))
                         data = compress(d)
-                        dist = end - start + len(data)
+                        l = len(data[1]) + len(data[0])
+                        dist = end - start + l
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not n or dist > len(text) * 2:
                         data = compress(text)
+                        l = len(data[1]) + len(data[0])
                         base = n
                     else:
                         base = self.base(t)
                     offset = 0
                     if t >= 0:
                         offset = self.end(t)
-                    e = (offset, len(data), base, link, p1, p2, node)
+                    e = (offset, l, base, link, p1, p2, node)
                     self.index.append(e)
                     self.nodemap[node] = n
                     entry = struct.pack(indexformat, *e)
                     transaction.add(self.datafile, e[0])
-                    self.opener(self.datafile, "a").write(data)
+                    f = self.opener(self.datafile, "a")
+                    if data[0]:
+                        f.write(data[0])
+                    f.write(data[1])
                     transaction.add(self.indexfile, n * len(entry))
                     self.opener(self.indexfile, "a").write(entry)
                     self.cache = (node, n, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # calculate the distance of every node from root
                     dist = {nullid: 0}
                     for i in xrange(self.count()):
                         n = self.node(i)
                         p1, p2 = self.parents(n)
                         dist[n] = max(dist[p1], dist[p2]) + 1
                     # traverse ancestors in order of decreasing distance from root
                     def ancestors(node):
                         # we store negative distances because heap returns smallest member
                         h = [(-dist[node], node)]
                         seen = {}
                         earliest = self.count()
                         while h:
                             d, n = heapq.heappop(h)
                             if n not in seen:
                                 seen[n] = 1
                                 r = self.rev(n)
                                 yield (-d, n)
                                 for p in self.parents(n):
                                     heapq.heappush(h, (-dist[p], p))
                     def generations(node):
                         sg, s = None, {}
                         for g,n in ancestors(node):
                             if g != sg:
                                 if sg:
                                     yield sg, s
                                 sg, s = g, {n:1}
                             else:
                                 s[n] = 1
                         yield sg, s
                     x = generations(a)
                     y = generations(b)
                     gx = x.next()
                     gy = y.next()
                     # increment each ancestor list until it is closer to root than
                     # the other, or they match
                     while 1:
                         #print "ancestor gen %s %s" % (gx[0], gy[0])
                         if gx[0] == gy[0]:
                             # find the intersection
                             i = [ n for n in gx[1] if n in gy[1] ]
                             if i:
                                 return i[0]
                             else:
                                 #print "next"
                                 gy = y.next()
                                 gx = x.next()
                         elif gx[0] < gy[0]:
                             #print "next y"
                             gy = y.next()
                         else:
                             #print "next x"
                             gx = x.next()
                 def group(self, nodelist, lookup, infocollect = None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     needed = dict.fromkeys(revs, 1)
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield struct.pack(">l", 0)
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # for each delta that isn't contiguous in the log, we need to
                     # reconstruct the base, reconstruct the result, and then
                     # calculate the delta. We also need to do this where we've
                     # stored a full version and not a delta
                     for i in xrange(0, len(revs) - 1):
                         a, b = revs[i], revs[i + 1]
                         if a + 1 != b or self.base(b) == b:
                             for j in xrange(self.base(a), a + 1):
                                 needed[j] = 1
                             for j in xrange(self.base(b), b + 1):
                                 needed[j] = 1
                     # calculate spans to retrieve from datafile
                     needed = needed.keys()
                     needed.sort()
                     spans = []
                     oo = -1
                     ol = 0
                     for n in needed:
                         if n < 0: continue
                         o = self.start(n)
                         l = self.length(n)
                         if oo + ol == o: # can we merge with the previous?
                             nl = spans[-1][2]
                             nl.append((n, l))
                             ol += l
                             spans[-1] = (oo, ol, nl)
                         else:
                             oo = o
                             ol = l
                             spans.append((oo, ol, [(n, l)]))
                     # read spans in, divide up chunks
                     chunks = {}
                     for span in spans:
                         # we reopen the file for each span to make http happy for now
                         f = self.opener(self.datafile)
                         f.seek(span[0])
                         data = f.read(span[1])
                         # divide up the span
                         pos = 0
                         for r, l in span[2]:
                             chunks[r] = decompress(data[pos: pos + l])
                             pos += l
                     # helper to reconstruct intermediate versions
                     def construct(text, base, rev):
                         bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
                         return mdiff.patches(text, bins)
                     # build deltas
                     deltas = []
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         n = self.node(b)
                         if infocollect is not None:
                             infocollect(n)
                         # do we need to construct a new delta?
                         if a + 1 != b or self.base(b) == b:
                             if a >= 0:
                                 base = self.base(a)
                                 ta = chunks[self.base(a)]
                                 ta = construct(ta, base, a)
                             else:
                                 ta = ""
                             base = self.base(b)
                             if a > base:
                                 base = a
                                 tb = ta
                             else:
                                 tb = chunks[self.base(b)]
                             tb = construct(tb, base, b)
                             d = self.diff(ta, tb)
                         else:
                             d = chunks[b]
                         p = self.parents(n)
                         meta = n + p[0] + p[1] + lookup(n)
                         l = struct.pack(">l", len(meta) + len(d) + 4)
                         yield l
                         yield meta
                         yield d
                     yield struct.pack(">l", 0)
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = nullid
                     base = prev = -1
                     start = end = measure = 0
                     if r:
                         start = self.start(self.base(t))
                         end = self.end(t)
                         measure = self.length(self.base(t))
                         base = self.base(t)
                         prev = self.tip()
                     transaction.add(self.datafile, end)
                     transaction.add(self.indexfile, r * struct.calcsize(indexformat))
                     dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = chunk[80:]
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise RevlogError(_("unknown parent %s") % short(p1))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise RevlogError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
-                            cdelta = compress(delta)
+                            tempd = compress(delta)
+                            cdelta = tempd[0] + tempd[1]
                         if chain != prev or (end - start + len(cdelta)) > measure * 2:
                             # flush our writes here so we can read it in revision
                             dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             text = self.patches(text, [delta])
                             chk = self.addrevision(text, transaction, link, p1, p2)
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             measure = len(text)
                         else:
                             e = (end, len(cdelta), self.base(t), link, p1, p2, node)
                             self.index.append(e)
                             self.nodemap[node] = r
                             dfh.write(cdelta)
                             ifh.write(struct.pack(indexformat, *e))
                         t, r, chain, prev = r, r + 1, node, node
                         start = self.start(self.base(t))
                         end = self.end(t)
                     dfh.close()
                     ifh.close()
                     return node
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = self.end(self.count() - 1)
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         return expected - actual
                     except IOError, inst:
                         if inst.errno == errno.ENOENT:
                             return 0
                         raise