upstream/mercurial-mirror Commit - r14075:bc101902

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from node import bin, hex, nullid, nullrev, short #@UnusedImport

15

from node import bin, hex, nullid, nullrev, short #@UnusedImport

16

from i18n import _

16

from i18n import _

17

import ancestor, mdiff, parsers, error, util

17

import ancestor, mdiff, parsers, error, util

18

import struct, zlib, errno

18

import struct, zlib, errno

19

20

_pack = struct.pack

20

_pack = struct.pack

21

_unpack = struct.unpack

21

_unpack = struct.unpack

22

_compress = zlib.compress

22

_compress = zlib.compress

23

_decompress = zlib.decompress

23

_decompress = zlib.decompress

24

_sha = util.sha1

24

_sha = util.sha1

25

26

# revlog header flags

26

# revlog header flags

27

REVLOGV0 = 0

27

REVLOGV0 = 0

28

REVLOGNG = 1

28

REVLOGNG = 1

29

REVLOGNGINLINEDATA = (1 << 16)

29

REVLOGNGINLINEDATA = (1 << 16)

30

REVLOGSHALLOW = (1 << 17)

30

REVLOGSHALLOW = (1 << 17)

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW

35

36

# revlog index flags

36

# revlog index flags

37

REVIDX_PARENTDELTA = 1

37

REVIDX_PARENTDELTA = 1

38

REVIDX_PUNCHED_FLAG = 2

38

REVIDX_PUNCHED_FLAG = 2

39

REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA

39

REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA

40

41

# max size of revlog with inline data

41

# max size of revlog with inline data

42

_maxinline = 131072

42

_maxinline = 131072

43

_chunksize = 1048576

43

_chunksize = 1048576

44

45

RevlogError = error.RevlogError

45

RevlogError = error.RevlogError

46

LookupError = error.LookupError

46

LookupError = error.LookupError

47

48

def getoffset(q):

48

def getoffset(q):

49

return int(q >> 16)

49

return int(q >> 16)

50

51

def gettype(q):

51

def gettype(q):

52

return int(q & 0xFFFF)

52

return int(q & 0xFFFF)

53

54

def offset_type(offset, type):

54

def offset_type(offset, type):

55

return long(long(offset) << 16 | type)

55

return long(long(offset) << 16 | type)

56

57

nullhash = _sha(nullid)

57

nullhash = _sha(nullid)

58

59

def hash(text, p1, p2):

59

def hash(text, p1, p2):

60

"""generate a hash from the given text and its parent hashes

60

"""generate a hash from the given text and its parent hashes

61

62

This hash combines both the current file contents and its history

62

This hash combines both the current file contents and its history

63

in a manner that makes it easy to distinguish nodes with the same

63

in a manner that makes it easy to distinguish nodes with the same

64

content in the revision graph.

64

content in the revision graph.

65

"""

65

"""

66

# As of now, if one of the parent node is null, p2 is null

66

# As of now, if one of the parent node is null, p2 is null

67

if p2 == nullid:

67

if p2 == nullid:

68

# deep copy of a hash is faster than creating one

68

# deep copy of a hash is faster than creating one

69

s = nullhash.copy()

69

s = nullhash.copy()

70

s.update(p1)

70

s.update(p1)

71

else:

71

else:

72

# none of the parent nodes are nullid

72

# none of the parent nodes are nullid

73

l = [p1, p2]

73

l = [p1, p2]

74

l.sort()

74

l.sort()

75

s = _sha(l[0])

75

s = _sha(l[0])

76

s.update(l[1])

76

s.update(l[1])

77

s.update(text)

77

s.update(text)

78

return s.digest()

78

return s.digest()

79

80

def compress(text):

80

def compress(text):

81

""" generate a possibly-compressed representation of text """

81

""" generate a possibly-compressed representation of text """

82

if not text:

82

if not text:

83

return ("", text)

83

return ("", text)

84

l = len(text)

84

l = len(text)

85

bin = None

85

bin = None

86

if l < 44:

86

if l < 44:

87

pass

87

pass

88

elif l > 1000000:

88

elif l > 1000000:

89

# zlib makes an internal copy, thus doubling memory usage for

89

# zlib makes an internal copy, thus doubling memory usage for

90

# large files, so lets do this in pieces

90

# large files, so lets do this in pieces

91

z = zlib.compressobj()

91

z = zlib.compressobj()

92

p = []

92

p = []

93

pos = 0

93

pos = 0

94

while pos < l:

94

while pos < l:

95

pos2 = pos + 2**20

95

pos2 = pos + 2**20

96

p.append(z.compress(text[pos:pos2]))

96

p.append(z.compress(text[pos:pos2]))

97

pos = pos2

97

pos = pos2

98

p.append(z.flush())

98

p.append(z.flush())

99

if sum(map(len, p)) < l:

99

if sum(map(len, p)) < l:

100

bin = "".join(p)

100

bin = "".join(p)

101

else:

101

else:

102

bin = _compress(text)

102

bin = _compress(text)

103

if bin is None or len(bin) > l:

103

if bin is None or len(bin) > l:

104

if text[0] == '\0':

104

if text[0] == '\0':

105

return ("", text)

105

return ("", text)

106

return ('u', text)

106

return ('u', text)

107

return ("", bin)

107

return ("", bin)

108

109

def decompress(bin):

109

def decompress(bin):

110

""" decompress the given input """

110

""" decompress the given input """

111

if not bin:

111

if not bin:

112

return bin

112

return bin

113

t = bin[0]

113

t = bin[0]

114

if t == '\0':

114

if t == '\0':

115

return bin

115

return bin

116

if t == 'x':

116

if t == 'x':

117

return _decompress(bin)

117

return _decompress(bin)

118

if t == 'u':

118

if t == 'u':

119

return bin[1:]

119

return bin[1:]

120

raise RevlogError(_("unknown compression type %r") % t)

120

raise RevlogError(_("unknown compression type %r") % t)

121

122

indexformatv0 = ">4l20s20s20s"

122

indexformatv0 = ">4l20s20s20s"

123

v0shaoffset = 56

123

v0shaoffset = 56

124

125

class revlogoldio(object):

125

class revlogoldio(object):

126

def __init__(self):

126

def __init__(self):

127

self.size = struct.calcsize(indexformatv0)

127

self.size = struct.calcsize(indexformatv0)

128

129

def parseindex(self, data, inline):

129

def parseindex(self, data, inline):

130

s = self.size

130

s = self.size

131

index = []

131

index = []

132

nodemap = {nullid: nullrev}

132

nodemap = {nullid: nullrev}

133

n = off = 0

133

n = off = 0

134

l = len(data)

134

l = len(data)

135

while off + s <= l:

135

while off + s <= l:

136

cur = data[off:off + s]

136

cur = data[off:off + s]

137

off += s

137

off += s

138

e = _unpack(indexformatv0, cur)

138

e = _unpack(indexformatv0, cur)

139

# transform to revlogv1 format

139

# transform to revlogv1 format

140

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

140

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

141

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

141

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

142

index.append(e2)

142

index.append(e2)

143

nodemap[e[6]] = n

143

nodemap[e[6]] = n

144

n += 1

144

n += 1

145

146

# add the magic null revision at -1

146

# add the magic null revision at -1

147

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

147

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

148

149

return index, nodemap, None

149

return index, nodemap, None

150

151

def packentry(self, entry, node, version, rev):

151

def packentry(self, entry, node, version, rev):

152

if gettype(entry[0]):

152

if gettype(entry[0]):

153

raise RevlogError(_("index entry flags need RevlogNG"))

153

raise RevlogError(_("index entry flags need RevlogNG"))

154

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

154

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

155

node(entry[5]), node(entry[6]), entry[7])

155

node(entry[5]), node(entry[6]), entry[7])

156

return _pack(indexformatv0, *e2)

156

return _pack(indexformatv0, *e2)

157

158

# index ng:

158

# index ng:

159

# 6 bytes: offset

159

# 6 bytes: offset

160

# 2 bytes: flags

160

# 2 bytes: flags

161

# 4 bytes: compressed length

161

# 4 bytes: compressed length

162

# 4 bytes: uncompressed length

162

# 4 bytes: uncompressed length

163

# 4 bytes: base rev

163

# 4 bytes: base rev

164

# 4 bytes: link rev

164

# 4 bytes: link rev

165

# 4 bytes: parent 1 rev

165

# 4 bytes: parent 1 rev

166

# 4 bytes: parent 2 rev

166

# 4 bytes: parent 2 rev

167

# 32 bytes: nodeid

167

# 32 bytes: nodeid

168

indexformatng = ">Qiiiiii20s12x"

168

indexformatng = ">Qiiiiii20s12x"

169

ngshaoffset = 32

169

ngshaoffset = 32

170

versionformat = ">I"

170

versionformat = ">I"

171

172

class revlogio(object):

172

class revlogio(object):

173

def __init__(self):

173

def __init__(self):

174

self.size = struct.calcsize(indexformatng)

174

self.size = struct.calcsize(indexformatng)

175

176

def parseindex(self, data, inline):

176

def parseindex(self, data, inline):

177

# call the C implementation to parse the index data

177

# call the C implementation to parse the index data

178

index, cache = parsers.parse_index2(data, inline)

178

index, cache = parsers.parse_index2(data, inline)

179

return index, None, cache

179

return index, None, cache

180

181

def packentry(self, entry, node, version, rev):

181

def packentry(self, entry, node, version, rev):

182

p = _pack(indexformatng, *entry)

182

p = _pack(indexformatng, *entry)

183

if rev == 0:

183

if rev == 0:

184

p = _pack(versionformat, version) + p[4:]

184

p = _pack(versionformat, version) + p[4:]

185

return p

185

return p

186

187

class revlog(object):

187

class revlog(object):

188

"""

188

"""

189

the underlying revision storage object

189

the underlying revision storage object

190

191

A revlog consists of two parts, an index and the revision data.

191

A revlog consists of two parts, an index and the revision data.

192

193

The index is a file with a fixed record size containing

193

The index is a file with a fixed record size containing

194

information on each revision, including its nodeid (hash), the

194

information on each revision, including its nodeid (hash), the

195

nodeids of its parents, the position and offset of its data within

195

nodeids of its parents, the position and offset of its data within

196

the data file, and the revision it's based on. Finally, each entry

196

the data file, and the revision it's based on. Finally, each entry

197

contains a linkrev entry that can serve as a pointer to external

197

contains a linkrev entry that can serve as a pointer to external

198

data.

198

data.

199

200

The revision data itself is a linear collection of data chunks.

200

The revision data itself is a linear collection of data chunks.

201

Each chunk represents a revision and is usually represented as a

201

Each chunk represents a revision and is usually represented as a

202

delta against the previous chunk. To bound lookup time, runs of

202

delta against the previous chunk. To bound lookup time, runs of

203

deltas are limited to about 2 times the length of the original

203

deltas are limited to about 2 times the length of the original

204

version data. This makes retrieval of a version proportional to

204

version data. This makes retrieval of a version proportional to

205

its size, or O(1) relative to the number of revisions.

205

its size, or O(1) relative to the number of revisions.

206

207

Both pieces of the revlog are written to in an append-only

207

Both pieces of the revlog are written to in an append-only

208

fashion, which means we never need to rewrite a file to insert or

208

fashion, which means we never need to rewrite a file to insert or

209

remove data, and can use some simple techniques to avoid the need

209

remove data, and can use some simple techniques to avoid the need

210

for locking while reading.

210

for locking while reading.

211

"""

211

"""

212

def __init__(self, opener, indexfile, shallowroot=None):

212

def __init__(self, opener, indexfile, shallowroot=None):

213

"""

213

"""

214

create a revlog object

214

create a revlog object

215

216

opener is a function that abstracts the file opening operation

216

opener is a function that abstracts the file opening operation

217

and can be used to implement COW semantics or the like.

217

and can be used to implement COW semantics or the like.

218

"""

218

"""

219

self.indexfile = indexfile

219

self.indexfile = indexfile

220

self.datafile = indexfile[:-2] + ".d"

220

self.datafile = indexfile[:-2] + ".d"

221

self.opener = opener

221

self.opener = opener

222

self._cache = None

222

self._cache = None

223

self._chunkcache = (0, '')

223

self._chunkcache = (0, '')

224

self.index = []

224

self.index = []

225

self._shallowroot = shallowroot

225

self._shallowroot = shallowroot

226

self._parentdelta = 0

226

self._parentdelta = 0

227

self._pcache = {}

227

self._pcache = {}

228

self._nodecache = {nullid: nullrev}

228

self._nodecache = {nullid: nullrev}

229

self._nodepos = None

229

self._nodepos = None

230

231

v = REVLOG_DEFAULT_VERSION

231

v = REVLOG_DEFAULT_VERSION

232

if hasattr(opener, 'options') and 'defversion' in opener.options:

232

if hasattr(opener, 'options') and 'defversion' in opener.options:

233

v = opener.options['defversion']

233

v = opener.options['defversion']

234

if v & REVLOGNG:

234

if v & REVLOGNG:

235

v |= REVLOGNGINLINEDATA

235

v |= REVLOGNGINLINEDATA

236

if v & REVLOGNG and 'parentdelta' in opener.options:

236

if v & REVLOGNG and 'parentdelta' in opener.options:

237

self._parentdelta = 1

237

self._parentdelta = 1

238

239

if shallowroot:

239

if shallowroot:

240

v |= REVLOGSHALLOW

240

v |= REVLOGSHALLOW

241

242

i = ''

242

i = ''

243

try:

243

try:

244

f = self.opener(self.indexfile)

244

f = self.opener(self.indexfile)

245

i = f.read()

245

i = f.read()

246

f.close()

246

f.close()

247

if len(i) > 0:

247

if len(i) > 0:

248

v = struct.unpack(versionformat, i[:4])[0]

248

v = struct.unpack(versionformat, i[:4])[0]

249

except IOError, inst:

249

except IOError, inst:

250

if inst.errno != errno.ENOENT:

250

if inst.errno != errno.ENOENT:

251

raise

251

raise

252

253

self.version = v

253

self.version = v

254

self._inline = v & REVLOGNGINLINEDATA

254

self._inline = v & REVLOGNGINLINEDATA

255

self._shallow = v & REVLOGSHALLOW

255

self._shallow = v & REVLOGSHALLOW

256

flags = v & ~0xFFFF

256

flags = v & ~0xFFFF

257

fmt = v & 0xFFFF

257

fmt = v & 0xFFFF

258

if fmt == REVLOGV0 and flags:

258

if fmt == REVLOGV0 and flags:

259

raise RevlogError(_("index %s unknown flags %#04x for format v0")

259

raise RevlogError(_("index %s unknown flags %#04x for format v0")

260

% (self.indexfile, flags >> 16))

260

% (self.indexfile, flags >> 16))

261

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

261

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

262

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

262

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

263

% (self.indexfile, flags >> 16))

263

% (self.indexfile, flags >> 16))

264

elif fmt > REVLOGNG:

264

elif fmt > REVLOGNG:

265

raise RevlogError(_("index %s unknown format %d")

265

raise RevlogError(_("index %s unknown format %d")

266

% (self.indexfile, fmt))

266

% (self.indexfile, fmt))

267

268

self._io = revlogio()

268

self._io = revlogio()

269

if self.version == REVLOGV0:

269

if self.version == REVLOGV0:

270

self._io = revlogoldio()

270

self._io = revlogoldio()

271

try:

271

try:

272

d = self._io.parseindex(i, self._inline)

272

d = self._io.parseindex(i, self._inline)

273

except (ValueError, IndexError):

273

except (ValueError, IndexError):

274

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

274

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

275

self.index, nodemap, self._chunkcache = d

275

self.index, nodemap, self._chunkcache = d

276

if nodemap is not None:

276

if nodemap is not None:

277

self.nodemap = self._nodecache = nodemap

277

self.nodemap = self._nodecache = nodemap

278

if not self._chunkcache:

278

if not self._chunkcache:

279

self._chunkclear()

279

self._chunkclear()

280

281

def tip(self):

281

def tip(self):

282

return self.node(len(self.index) - 2)

282

return self.node(len(self.index) - 2)

283

def __len__(self):

283

def __len__(self):

284

return len(self.index) - 1

284

return len(self.index) - 1

285

def __iter__(self):

285

def __iter__(self):

286

for i in xrange(len(self)):

286

for i in xrange(len(self)):

287

yield i

287

yield i

288

289

@util.propertycache

289

@util.propertycache

290

def nodemap(self):

290

def nodemap(self):

291

self.rev(self.node(0))

291

self.rev(self.node(0))

292

return self._nodecache

292

return self._nodecache

293

294

def rev(self, node):

294

def rev(self, node):

295

try:

295

try:

296

return self._nodecache[node]

296

return self._nodecache[node]

297

except KeyError:

297

except KeyError:

298

n = self._nodecache

298

n = self._nodecache

299

i = self.index

299

i = self.index

300

p = self._nodepos

300

p = self._nodepos

301

if p is None:

301

if p is None:

302

p = len(i) - 2

302

p = len(i) - 2

303

for r in xrange(p, -1, -1):

303

for r in xrange(p, -1, -1):

304

v = i[r][7]

304

v = i[r][7]

305

n[v] = r

305

n[v] = r

306

if v == node:

306

if v == node:

307

self._nodepos = r - 1

307

self._nodepos = r - 1

308

return r

308

return r

309

raise LookupError(node, self.indexfile, _('no node'))

309

raise LookupError(node, self.indexfile, _('no node'))

310

311

def node(self, rev):

311

def node(self, rev):

312

return self.index[rev][7]

312

return self.index[rev][7]

313

def linkrev(self, rev):

313

def linkrev(self, rev):

314

return self.index[rev][4]

314

return self.index[rev][4]

315

def parents(self, node):

315

def parents(self, node):

316

i = self.index

316

i = self.index

317

d = i[self.rev(node)]

317

d = i[self.rev(node)]

318

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

318

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

319

def parentrevs(self, rev):

319

def parentrevs(self, rev):

320

return self.index[rev][5:7]

320

return self.index[rev][5:7]

321

def start(self, rev):

321

def start(self, rev):

322

return int(self.index[rev][0] >> 16)

322

return int(self.index[rev][0] >> 16)

323

def end(self, rev):

323

def end(self, rev):

324

return self.start(rev) + self.length(rev)

324

return self.start(rev) + self.length(rev)

325

def length(self, rev):

325

def length(self, rev):

326

return self.index[rev][1]

326

return self.index[rev][1]

327

def base(self, rev):

327

def base(self, rev):

328

return self.index[rev][3]

328

return self.index[rev][3]

329

def flags(self, rev):

329

def flags(self, rev):

330

return self.index[rev][0] & 0xFFFF

330

return self.index[rev][0] & 0xFFFF

331

def rawsize(self, rev):

331

def rawsize(self, rev):

332

"""return the length of the uncompressed text for a given revision"""

332

"""return the length of the uncompressed text for a given revision"""

333

l = self.index[rev][2]

333

l = self.index[rev][2]

334

if l >= 0:

334

if l >= 0:

335

return l

335

return l

336

337

t = self.revision(self.node(rev))

337

t = self.revision(self.node(rev))

338

return len(t)

338

return len(t)

339

size = rawsize

339

size = rawsize

340

341

def reachable(self, node, stop=None):

341

def reachable(self, node, stop=None):

342

"""return the set of all nodes ancestral to a given node, including

342

"""return the set of all nodes ancestral to a given node, including

343

the node itself, stopping when stop is matched"""

343

the node itself, stopping when stop is matched"""

344

reachable = set((node,))

344

reachable = set((node,))

345

visit = [node]

345

visit = [node]

346

if stop:

346

if stop:

347

stopn = self.rev(stop)

347

stopn = self.rev(stop)

348

else:

348

else:

349

stopn = 0

349

stopn = 0

350

while visit:

350

while visit:

351

n = visit.pop(0)

351

n = visit.pop(0)

352

if n == stop:

352

if n == stop:

353

continue

353

continue

354

if n == nullid:

354

if n == nullid:

355

continue

355

continue

356

for p in self.parents(n):

356

for p in self.parents(n):

357

if self.rev(p) < stopn:

357

if self.rev(p) < stopn:

358

continue

358

continue

359

if p not in reachable:

359

if p not in reachable:

360

reachable.add(p)

360

reachable.add(p)

361

visit.append(p)

361

visit.append(p)

362

return reachable

362

return reachable

363

364

def ancestors(self, *revs):

364

def ancestors(self, *revs):

365

"""Generate the ancestors of 'revs' in reverse topological order.

365

"""Generate the ancestors of 'revs' in reverse topological order.

366

367

Yield a sequence of revision numbers starting with the parents

367

Yield a sequence of revision numbers starting with the parents

368

of each revision in revs, i.e., each revision is *not* considered

368

of each revision in revs, i.e., each revision is *not* considered

369

an ancestor of itself. Results are in breadth-first order:

369

an ancestor of itself. Results are in breadth-first order:

370

parents of each rev in revs, then parents of those, etc. Result

370

parents of each rev in revs, then parents of those, etc. Result

371

does not include the null revision."""

371

does not include the null revision."""

372

visit = list(revs)

372

visit = list(revs)

373

seen = set([nullrev])

373

seen = set([nullrev])

374

while visit:

374

while visit:

375

for parent in self.parentrevs(visit.pop(0)):

375

for parent in self.parentrevs(visit.pop(0)):

376

if parent not in seen:

376

if parent not in seen:

377

visit.append(parent)

377

visit.append(parent)

378

seen.add(parent)

378

seen.add(parent)

379

yield parent

379

yield parent

380

381

def descendants(self, *revs):

381

def descendants(self, *revs):

382

"""Generate the descendants of 'revs' in revision order.

382

"""Generate the descendants of 'revs' in revision order.

383

384

Yield a sequence of revision numbers starting with a child of

384

Yield a sequence of revision numbers starting with a child of

385

some rev in revs, i.e., each revision is *not* considered a

385

some rev in revs, i.e., each revision is *not* considered a

386

descendant of itself. Results are ordered by revision number (a

386

descendant of itself. Results are ordered by revision number (a

387

topological sort)."""

387

topological sort)."""

388

first = min(revs)

388

first = min(revs)

389

if first == nullrev:

389

if first == nullrev:

390

for i in self:

390

for i in self:

391

yield i

391

yield i

392

return

392

return

393

394

seen = set(revs)

394

seen = set(revs)

395

for i in xrange(first + 1, len(self)):

395

for i in xrange(first + 1, len(self)):

396

for x in self.parentrevs(i):

396

for x in self.parentrevs(i):

397

if x != nullrev and x in seen:

397

if x != nullrev and x in seen:

398

seen.add(i)

398

seen.add(i)

399

yield i

399

yield i

400

break

400

break

401

402

def findcommonmissing(self, common=None, heads=None):

402

def findcommonmissing(self, common=None, heads=None):

403

"""Return a tuple of the ancestors of common and the ancestors of heads

403

"""Return a tuple of the ancestors of common and the ancestors of heads

404

that are not ancestors of common.

404

that are not ancestors of common.

405

406

More specifically, the second element is a list of nodes N such that

406

More specifically, the second element is a list of nodes N such that

407

every N satisfies the following constraints:

407

every N satisfies the following constraints:

408

409

1. N is an ancestor of some node in 'heads'

409

1. N is an ancestor of some node in 'heads'

410

2. N is not an ancestor of any node in 'common'

410

2. N is not an ancestor of any node in 'common'

411

412

The list is sorted by revision number, meaning it is

412

The list is sorted by revision number, meaning it is

413

topologically sorted.

413

topologically sorted.

414

415

'heads' and 'common' are both lists of node IDs. If heads is

415

'heads' and 'common' are both lists of node IDs. If heads is

416

not supplied, uses all of the revlog's heads. If common is not

416

not supplied, uses all of the revlog's heads. If common is not

417

supplied, uses nullid."""

417

supplied, uses nullid."""

418

if common is None:

418

if common is None:

419

common = [nullid]

419

common = [nullid]

420

if heads is None:

420

if heads is None:

421

heads = self.heads()

421

heads = self.heads()

422

423

common = [self.rev(n) for n in common]

423

common = [self.rev(n) for n in common]

424

heads = [self.rev(n) for n in heads]

424

heads = [self.rev(n) for n in heads]

425

426

# we want the ancestors, but inclusive

426

# we want the ancestors, but inclusive

427

has = set(self.ancestors(*common))

427

has = set(self.ancestors(*common))

428

has.add(nullrev)

428

has.add(nullrev)

429

has.update(common)

429

has.update(common)

430

431

# take all ancestors from heads that aren't in has

431

# take all ancestors from heads that aren't in has

432

missing = set()

432

missing = set()

433

visit = [r for r in heads if r not in has]

433

visit = [r for r in heads if r not in has]

434

while visit:

434

while visit:

435

r = visit.pop(0)

435

r = visit.pop(0)

436

if r in missing:

436

if r in missing:

437

continue

437

continue

438

else:

438

else:

439

missing.add(r)

439

missing.add(r)

440

for p in self.parentrevs(r):

440

for p in self.parentrevs(r):

441

if p not in has:

441

if p not in has:

442

visit.append(p)

442

visit.append(p)

443

missing = list(missing)

443

missing = list(missing)

444

missing.sort()

444

missing.sort()

445

return has, [self.node(r) for r in missing]

445

return has, [self.node(r) for r in missing]

446

447

def findmissing(self, common=None, heads=None):

447

def findmissing(self, common=None, heads=None):

448

"""Return the ancestors of heads that are not ancestors of common.

448

"""Return the ancestors of heads that are not ancestors of common.

449

450

More specifically, return a list of nodes N such that every N

450

More specifically, return a list of nodes N such that every N

451

satisfies the following constraints:

451

satisfies the following constraints:

452

453

1. N is an ancestor of some node in 'heads'

453

1. N is an ancestor of some node in 'heads'

454

2. N is not an ancestor of any node in 'common'

454

2. N is not an ancestor of any node in 'common'

455

456

The list is sorted by revision number, meaning it is

456

The list is sorted by revision number, meaning it is

457

topologically sorted.

457

topologically sorted.

458

459

'heads' and 'common' are both lists of node IDs. If heads is

459

'heads' and 'common' are both lists of node IDs. If heads is

460

not supplied, uses all of the revlog's heads. If common is not

460

not supplied, uses all of the revlog's heads. If common is not

461

supplied, uses nullid."""

461

supplied, uses nullid."""

462

_common, missing = self.findcommonmissing(common, heads)

462

_common, missing = self.findcommonmissing(common, heads)

463

return missing

463

return missing

464

465

def nodesbetween(self, roots=None, heads=None):

465

def nodesbetween(self, roots=None, heads=None):

466

"""Return a topological path from 'roots' to 'heads'.

466

"""Return a topological path from 'roots' to 'heads'.

467

468

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

468

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

469

topologically sorted list of all nodes N that satisfy both of

469

topologically sorted list of all nodes N that satisfy both of

470

these constraints:

470

these constraints:

471

472

1. N is a descendant of some node in 'roots'

472

1. N is a descendant of some node in 'roots'

473

2. N is an ancestor of some node in 'heads'

473

2. N is an ancestor of some node in 'heads'

474

475

Every node is considered to be both a descendant and an ancestor

475

Every node is considered to be both a descendant and an ancestor

476

of itself, so every reachable node in 'roots' and 'heads' will be

476

of itself, so every reachable node in 'roots' and 'heads' will be

477

included in 'nodes'.

477

included in 'nodes'.

478

479

'outroots' is the list of reachable nodes in 'roots', i.e., the

479

'outroots' is the list of reachable nodes in 'roots', i.e., the

480

subset of 'roots' that is returned in 'nodes'. Likewise,

480

subset of 'roots' that is returned in 'nodes'. Likewise,

481

'outheads' is the subset of 'heads' that is also in 'nodes'.

481

'outheads' is the subset of 'heads' that is also in 'nodes'.

482

483

'roots' and 'heads' are both lists of node IDs. If 'roots' is

483

'roots' and 'heads' are both lists of node IDs. If 'roots' is

484

unspecified, uses nullid as the only root. If 'heads' is

484

unspecified, uses nullid as the only root. If 'heads' is

485

unspecified, uses list of all of the revlog's heads."""

485

unspecified, uses list of all of the revlog's heads."""

486

nonodes = ([], [], [])

486

nonodes = ([], [], [])

487

if roots is not None:

487

if roots is not None:

488

roots = list(roots)

488

roots = list(roots)

489

if not roots:

489

if not roots:

490

return nonodes

490

return nonodes

491

lowestrev = min([self.rev(n) for n in roots])

491

lowestrev = min([self.rev(n) for n in roots])

492

else:

492

else:

493

roots = [nullid] # Everybody's a descendent of nullid

493

roots = [nullid] # Everybody's a descendent of nullid

494

lowestrev = nullrev

494

lowestrev = nullrev

495

if (lowestrev == nullrev) and (heads is None):

495

if (lowestrev == nullrev) and (heads is None):

496

# We want _all_ the nodes!

496

# We want _all_ the nodes!

497

return ([self.node(r) for r in self], [nullid], list(self.heads()))

497

return ([self.node(r) for r in self], [nullid], list(self.heads()))

498

if heads is None:

498

if heads is None:

499

# All nodes are ancestors, so the latest ancestor is the last

499

# All nodes are ancestors, so the latest ancestor is the last

500

# node.

500

# node.

501

highestrev = len(self) - 1

501

highestrev = len(self) - 1

502

# Set ancestors to None to signal that every node is an ancestor.

502

# Set ancestors to None to signal that every node is an ancestor.

503

ancestors = None

503

ancestors = None

504

# Set heads to an empty dictionary for later discovery of heads

504

# Set heads to an empty dictionary for later discovery of heads

505

heads = {}

505

heads = {}

506

else:

506

else:

507

heads = list(heads)

507

heads = list(heads)

508

if not heads:

508

if not heads:

509

return nonodes

509

return nonodes

510

ancestors = set()

510

ancestors = set()

511

# Turn heads into a dictionary so we can remove 'fake' heads.

511

# Turn heads into a dictionary so we can remove 'fake' heads.

512

# Also, later we will be using it to filter out the heads we can't

512

# Also, later we will be using it to filter out the heads we can't

513

# find from roots.

513

# find from roots.

514

heads = dict.fromkeys(heads, 0)

514

heads = dict.fromkeys(heads, 0)

515

# Start at the top and keep marking parents until we're done.

515

# Start at the top and keep marking parents until we're done.

516

nodestotag = set(heads)

516

nodestotag = set(heads)

517

# Remember where the top was so we can use it as a limit later.

517

# Remember where the top was so we can use it as a limit later.

518

highestrev = max([self.rev(n) for n in nodestotag])

518

highestrev = max([self.rev(n) for n in nodestotag])

519

while nodestotag:

519

while nodestotag:

520

# grab a node to tag

520

# grab a node to tag

521

n = nodestotag.pop()

521

n = nodestotag.pop()

522

# Never tag nullid

522

# Never tag nullid

523

if n == nullid:

523

if n == nullid:

524

continue

524

continue

525

# A node's revision number represents its place in a

525

# A node's revision number represents its place in a

526

# topologically sorted list of nodes.

526

# topologically sorted list of nodes.

527

r = self.rev(n)

527

r = self.rev(n)

528

if r >= lowestrev:

528

if r >= lowestrev:

529

if n not in ancestors:

529

if n not in ancestors:

530

# If we are possibly a descendent of one of the roots

530

# If we are possibly a descendent of one of the roots

531

# and we haven't already been marked as an ancestor

531

# and we haven't already been marked as an ancestor

532

ancestors.add(n) # Mark as ancestor

532

ancestors.add(n) # Mark as ancestor

533

# Add non-nullid parents to list of nodes to tag.

533

# Add non-nullid parents to list of nodes to tag.

534

nodestotag.update([p for p in self.parents(n) if

534

nodestotag.update([p for p in self.parents(n) if

535

p != nullid])

535

p != nullid])

536

elif n in heads: # We've seen it before, is it a fake head?

536

elif n in heads: # We've seen it before, is it a fake head?

537

# So it is, real heads should not be the ancestors of

537

# So it is, real heads should not be the ancestors of

538

# any other heads.

538

# any other heads.

539

heads.pop(n)

539

heads.pop(n)

540

if not ancestors:

540

if not ancestors:

541

return nonodes

541

return nonodes

542

# Now that we have our set of ancestors, we want to remove any

542

# Now that we have our set of ancestors, we want to remove any

543

# roots that are not ancestors.

543

# roots that are not ancestors.

544

545

# If one of the roots was nullid, everything is included anyway.

545

# If one of the roots was nullid, everything is included anyway.

546

if lowestrev > nullrev:

546

if lowestrev > nullrev:

547

# But, since we weren't, let's recompute the lowest rev to not

547

# But, since we weren't, let's recompute the lowest rev to not

548

# include roots that aren't ancestors.

548

# include roots that aren't ancestors.

549

550

# Filter out roots that aren't ancestors of heads

550

# Filter out roots that aren't ancestors of heads

551

roots = [n for n in roots if n in ancestors]

551

roots = [n for n in roots if n in ancestors]

552

# Recompute the lowest revision

552

# Recompute the lowest revision

553

if roots:

553

if roots:

554

lowestrev = min([self.rev(n) for n in roots])

554

lowestrev = min([self.rev(n) for n in roots])

555

else:

555

else:

556

# No more roots? Return empty list

556

# No more roots? Return empty list

557

return nonodes

557

return nonodes

558

else:

558

else:

559

# We are descending from nullid, and don't need to care about

559

# We are descending from nullid, and don't need to care about

560

# any other roots.

560

# any other roots.

561

lowestrev = nullrev

561

lowestrev = nullrev

562

roots = [nullid]

562

roots = [nullid]

563

# Transform our roots list into a set.

563

# Transform our roots list into a set.

564

descendents = set(roots)

564

descendents = set(roots)

565

# Also, keep the original roots so we can filter out roots that aren't

565

# Also, keep the original roots so we can filter out roots that aren't

566

# 'real' roots (i.e. are descended from other roots).

566

# 'real' roots (i.e. are descended from other roots).

567

roots = descendents.copy()

567

roots = descendents.copy()

568

# Our topologically sorted list of output nodes.

568

# Our topologically sorted list of output nodes.

569

orderedout = []

569

orderedout = []

570

# Don't start at nullid since we don't want nullid in our output list,

570

# Don't start at nullid since we don't want nullid in our output list,

571

# and if nullid shows up in descedents, empty parents will look like

571

# and if nullid shows up in descedents, empty parents will look like

572

# they're descendents.

572

# they're descendents.

573

for r in xrange(max(lowestrev, 0), highestrev + 1):

573

for r in xrange(max(lowestrev, 0), highestrev + 1):

574

n = self.node(r)

574

n = self.node(r)

575

isdescendent = False

575

isdescendent = False

576

if lowestrev == nullrev: # Everybody is a descendent of nullid

576

if lowestrev == nullrev: # Everybody is a descendent of nullid

577

isdescendent = True

577

isdescendent = True

578

elif n in descendents:

578

elif n in descendents:

579

# n is already a descendent

579

# n is already a descendent

580

isdescendent = True

580

isdescendent = True

581

# This check only needs to be done here because all the roots

581

# This check only needs to be done here because all the roots

582

# will start being marked is descendents before the loop.

582

# will start being marked is descendents before the loop.

583

if n in roots:

583

if n in roots:

584

# If n was a root, check if it's a 'real' root.

584

# If n was a root, check if it's a 'real' root.

585

p = tuple(self.parents(n))

585

p = tuple(self.parents(n))

586

# If any of its parents are descendents, it's not a root.

586

# If any of its parents are descendents, it's not a root.

587

if (p[0] in descendents) or (p[1] in descendents):

587

if (p[0] in descendents) or (p[1] in descendents):

588

roots.remove(n)

588

roots.remove(n)

589

else:

589

else:

590

p = tuple(self.parents(n))

590

p = tuple(self.parents(n))

591

# A node is a descendent if either of its parents are

591

# A node is a descendent if either of its parents are

592

# descendents. (We seeded the dependents list with the roots

592

# descendents. (We seeded the dependents list with the roots

593

# up there, remember?)

593

# up there, remember?)

594

if (p[0] in descendents) or (p[1] in descendents):

594

if (p[0] in descendents) or (p[1] in descendents):

595

descendents.add(n)

595

descendents.add(n)

596

isdescendent = True

596

isdescendent = True

597

if isdescendent and ((ancestors is None) or (n in ancestors)):

597

if isdescendent and ((ancestors is None) or (n in ancestors)):

598

# Only include nodes that are both descendents and ancestors.

598

# Only include nodes that are both descendents and ancestors.

599

orderedout.append(n)

599

orderedout.append(n)

600

if (ancestors is not None) and (n in heads):

600

if (ancestors is not None) and (n in heads):

601

# We're trying to figure out which heads are reachable

601

# We're trying to figure out which heads are reachable

602

# from roots.

602

# from roots.

603

# Mark this head as having been reached

603

# Mark this head as having been reached

604

heads[n] = 1

604

heads[n] = 1

605

elif ancestors is None:

605

elif ancestors is None:

606

# Otherwise, we're trying to discover the heads.

606

# Otherwise, we're trying to discover the heads.

607

# Assume this is a head because if it isn't, the next step

607

# Assume this is a head because if it isn't, the next step

608

# will eventually remove it.

608

# will eventually remove it.

609

heads[n] = 1

609

heads[n] = 1

610

# But, obviously its parents aren't.

610

# But, obviously its parents aren't.

611

for p in self.parents(n):

611

for p in self.parents(n):

612

heads.pop(p, None)

612

heads.pop(p, None)

613

heads = [n for n in heads.iterkeys() if heads[n] != 0]

613

heads = [n for n in heads.iterkeys() if heads[n] != 0]

614

roots = list(roots)

614

roots = list(roots)

615

assert orderedout

615

assert orderedout

616

assert roots

616

assert roots

617

assert heads

617

assert heads

618

return (orderedout, roots, heads)

618

return (orderedout, roots, heads)

619

620

def heads(self, start=None, stop=None):

620

def heads(self, start=None, stop=None):

621

"""return the list of all nodes that have no children

621

"""return the list of all nodes that have no children

622

623

if start is specified, only heads that are descendants of

623

if start is specified, only heads that are descendants of

624

start will be returned

624

start will be returned

625

if stop is specified, it will consider all the revs from stop

625

if stop is specified, it will consider all the revs from stop

626

as if they had no children

626

as if they had no children

627

"""

627

"""

628

if start is None and stop is None:

628

if start is None and stop is None:

629

count = len(self)

629

count = len(self)

630

if not count:

630

if not count:

631

return [nullid]

631

return [nullid]

632

ishead = [1] * (count + 1)

632

ishead = [1] * (count + 1)

633

index = self.index

633

index = self.index

634

for r in xrange(count):

634

for r in xrange(count):

635

e = index[r]

635

e = index[r]

636

ishead[e[5]] = ishead[e[6]] = 0

636

ishead[e[5]] = ishead[e[6]] = 0

637

return [self.node(r) for r in xrange(count) if ishead[r]]

637

return [self.node(r) for r in xrange(count) if ishead[r]]

638

639

if start is None:

639

if start is None:

640

start = nullid

640

start = nullid

641

if stop is None:

641

if stop is None:

642

stop = []

642

stop = []

643

stoprevs = set([self.rev(n) for n in stop])

643

stoprevs = set([self.rev(n) for n in stop])

644

startrev = self.rev(start)

644

startrev = self.rev(start)

645

reachable = set((startrev,))

645

reachable = set((startrev,))

646

heads = set((startrev,))

646

heads = set((startrev,))

647

648

parentrevs = self.parentrevs

648

parentrevs = self.parentrevs

649

for r in xrange(startrev + 1, len(self)):

649

for r in xrange(startrev + 1, len(self)):

650

for p in parentrevs(r):

650

for p in parentrevs(r):

651

if p in reachable:

651

if p in reachable:

652

if r not in stoprevs:

652

if r not in stoprevs:

653

reachable.add(r)

653

reachable.add(r)

654

heads.add(r)

654

heads.add(r)

655

if p in heads and p not in stoprevs:

655

if p in heads and p not in stoprevs:

656

heads.remove(p)

656

heads.remove(p)

657

658

return [self.node(r) for r in heads]

658

return [self.node(r) for r in heads]

659

660

def children(self, node):

660

def children(self, node):

661

"""find the children of a given node"""

661

"""find the children of a given node"""

662

c = []

662

c = []

663

p = self.rev(node)

663

p = self.rev(node)

664

for r in range(p + 1, len(self)):

664

for r in range(p + 1, len(self)):

665

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

665

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

666

if prevs:

666

if prevs:

667

for pr in prevs:

667

for pr in prevs:

668

if pr == p:

668

if pr == p:

669

c.append(self.node(r))

669

c.append(self.node(r))

670

elif p == nullrev:

670

elif p == nullrev:

671

c.append(self.node(r))

671

c.append(self.node(r))

672

return c

672

return c

673

674

def descendant(self, start, end):

674

def descendant(self, start, end):

675

if start == nullrev:

675

if start == nullrev:

676

return True

676

return True

677

for i in self.descendants(start):

677

for i in self.descendants(start):

678

if i == end:

678

if i == end:

679

return True

679

return True

680

elif i > end:

680

elif i > end:

681

break

681

break

682

return False

682

return False

683

684

def ancestor(self, a, b):

684

def ancestor(self, a, b):

685

"""calculate the least common ancestor of nodes a and b"""

685

"""calculate the least common ancestor of nodes a and b"""

686

687

# fast path, check if it is a descendant

687

# fast path, check if it is a descendant

688

a, b = self.rev(a), self.rev(b)

688

a, b = self.rev(a), self.rev(b)

689

start, end = sorted((a, b))

689

start, end = sorted((a, b))

690

if self.descendant(start, end):

690

if self.descendant(start, end):

691

return self.node(start)

691

return self.node(start)

692

693

def parents(rev):

693

def parents(rev):

694

return [p for p in self.parentrevs(rev) if p != nullrev]

694

return [p for p in self.parentrevs(rev) if p != nullrev]

695

696

c = ancestor.ancestor(a, b, parents)

696

c = ancestor.ancestor(a, b, parents)

697

if c is None:

697

if c is None:

698

return nullid

698

return nullid

699

700

return self.node(c)

700

return self.node(c)

701

702

def _match(self, id):

702

def _match(self, id):

703

if isinstance(id, (long, int)):

703

if isinstance(id, (long, int)):

704

# rev

704

# rev

705

return self.node(id)

705

return self.node(id)

706

if len(id) == 20:

706

if len(id) == 20:

707

# possibly a binary node

707

# possibly a binary node

708

# odds of a binary node being all hex in ASCII are 1 in 10**25

708

# odds of a binary node being all hex in ASCII are 1 in 10**25

709

try:

709

try:

710

node = id

710

node = id

711

self.rev(node) # quick search the index

711

self.rev(node) # quick search the index

712

return node

712

return node

713

except LookupError:

713

except LookupError:

714

pass # may be partial hex id

714

pass # may be partial hex id

715

try:

715

try:

716

# str(rev)

716

# str(rev)

717

rev = int(id)

717

rev = int(id)

718

if str(rev) != id:

718

if str(rev) != id:

719

raise ValueError

719

raise ValueError

720

if rev < 0:

720

if rev < 0:

721

rev = len(self) + rev

721

rev = len(self) + rev

722

if rev < 0 or rev >= len(self):

722

if rev < 0 or rev >= len(self):

723

raise ValueError

723

raise ValueError

724

return self.node(rev)

724

return self.node(rev)

725

except (ValueError, OverflowError):

725

except (ValueError, OverflowError):

726

pass

726

pass

727

if len(id) == 40:

727

if len(id) == 40:

728

try:

728

try:

729

# a full hex nodeid?

729

# a full hex nodeid?

730

node = bin(id)

730

node = bin(id)

731

self.rev(node)

731

self.rev(node)

732

return node

732

return node

733

except (TypeError, LookupError):

733

except (TypeError, LookupError):

734

pass

734

pass

735

736

def _partialmatch(self, id):

736

def _partialmatch(self, id):

737

if id in self._pcache:

737

if id in self._pcache:

738

return self._pcache[id]

738

return self._pcache[id]

739

740

if len(id) < 40:

740

if len(id) < 40:

741

try:

741

try:

742

# hex(node)[:...]

742

# hex(node)[:...]

743

l = len(id) // 2 # grab an even number of digits

743

l = len(id) // 2 # grab an even number of digits

744

prefix = bin(id[:l * 2])

744

prefix = bin(id[:l * 2])

745

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

745

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

746

nl = [n for n in nl if hex(n).startswith(id)]

746

nl = [n for n in nl if hex(n).startswith(id)]

747

if len(nl) > 0:

747

if len(nl) > 0:

748

if len(nl) == 1:

748

if len(nl) == 1:

749

self._pcache[id] = nl[0]

749

self._pcache[id] = nl[0]

750

return nl[0]

750

return nl[0]

751

raise LookupError(id, self.indexfile,

751

raise LookupError(id, self.indexfile,

752

_('ambiguous identifier'))

752

_('ambiguous identifier'))

753

return None

753

return None

754

except TypeError:

754

except TypeError:

755

pass

755

pass

756

757

def lookup(self, id):

757

def lookup(self, id):

758

"""locate a node based on:

758

"""locate a node based on:

759

- revision number or str(revision number)

759

- revision number or str(revision number)

760

- nodeid or subset of hex nodeid

760

- nodeid or subset of hex nodeid

761

"""

761

"""

762

n = self._match(id)

762

n = self._match(id)

763

if n is not None:

763

if n is not None:

764

return n

764

return n

765

n = self._partialmatch(id)

765

n = self._partialmatch(id)

766

if n:

766

if n:

767

return n

767

return n

768

769

raise LookupError(id, self.indexfile, _('no match found'))

769

raise LookupError(id, self.indexfile, _('no match found'))

770

771

def cmp(self, node, text):

771

def cmp(self, node, text):

772

"""compare text with a given file revision

772

"""compare text with a given file revision

773

774

returns True if text is different than what is stored.

774

returns True if text is different than what is stored.

775

"""

775

"""

776

p1, p2 = self.parents(node)

776

p1, p2 = self.parents(node)

777

return hash(text, p1, p2) != node

777

return hash(text, p1, p2) != node

778

779

def _addchunk(self, offset, data):

779

def _addchunk(self, offset, data):

780

o, d = self._chunkcache

780

o, d = self._chunkcache

781

# try to add to existing cache

781

# try to add to existing cache

782

if o + len(d) == offset and len(d) + len(data) < _chunksize:

782

if o + len(d) == offset and len(d) + len(data) < _chunksize:

783

self._chunkcache = o, d + data

783

self._chunkcache = o, d + data

784

else:

784

else:

785

self._chunkcache = offset, data

785

self._chunkcache = offset, data

786

787

def _loadchunk(self, offset, length):

787

def _loadchunk(self, offset, length):

788

if self._inline:

788

if self._inline:

789

df = self.opener(self.indexfile)

789

df = self.opener(self.indexfile)

790

else:

790

else:

791

df = self.opener(self.datafile)

791

df = self.opener(self.datafile)

792

793

readahead = max(65536, length)

793

readahead = max(65536, length)

794

df.seek(offset)

794

df.seek(offset)

795

d = df.read(readahead)

795

d = df.read(readahead)

796

self._addchunk(offset, d)

796

self._addchunk(offset, d)

797

if readahead > length:

797

if readahead > length:

798

return d[:length]

798

return d[:length]

799

return d

799

return d

800

801

def _getchunk(self, offset, length):

801

def _getchunk(self, offset, length):

802

o, d = self._chunkcache

802

o, d = self._chunkcache

803

l = len(d)

803

l = len(d)

804

805

# is it in the cache?

805

# is it in the cache?

806

cachestart = offset - o

806

cachestart = offset - o

807

cacheend = cachestart + length

807

cacheend = cachestart + length

808

if cachestart >= 0 and cacheend <= l:

808

if cachestart >= 0 and cacheend <= l:

809

if cachestart == 0 and cacheend == l:

809

if cachestart == 0 and cacheend == l:

810

return d # avoid a copy

810

return d # avoid a copy

811

return d[cachestart:cacheend]

811

return d[cachestart:cacheend]

812

813

return self._loadchunk(offset, length)

813

return self._loadchunk(offset, length)

814

815

def _chunkraw(self, startrev, endrev):

815

def _chunkraw(self, startrev, endrev):

816

start = self.start(startrev)

816

start = self.start(startrev)

817

length = self.end(endrev) - start

817

length = self.end(endrev) - start

818

if self._inline:

818

if self._inline:

819

start += (startrev + 1) * self._io.size

819

start += (startrev + 1) * self._io.size

820

return self._getchunk(start, length)

820

return self._getchunk(start, length)

821

822

def _chunk(self, rev):

822

def _chunk(self, rev):

823

return decompress(self._chunkraw(rev, rev))

823

return decompress(self._chunkraw(rev, rev))

824

825

def _chunkbase(self, rev):

826

return self._chunk(rev)

827

825

def _chunkclear(self):

828

def _chunkclear(self):

826

self._chunkcache = (0, '')

829

self._chunkcache = (0, '')

827

830

828

def deltaparent(self, rev):

831

def deltaparent(self, rev):

829

"""return previous revision or parentrev according to flags"""

832

"""return previous revision or parentrev according to flags"""

830

if self.flags(rev) & REVIDX_PARENTDELTA:

833

if self.flags(rev) & REVIDX_PARENTDELTA:

831

return self.parentrevs(rev)[0]

834

return self.parentrevs(rev)[0]

832

else:

835

else:

833

return rev - 1

836

return rev - 1

834

837

835

def revdiff(self, rev1, rev2):

838

def revdiff(self, rev1, rev2):

836

"""return or calculate a delta between two revisions"""

839

"""return or calculate a delta between two revisions"""

837

if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:

840

if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:

838

return self._chunk(rev2)

841

return self._chunk(rev2)

839

842

840

return mdiff.textdiff(self.revision(self.node(rev1)),

843

return mdiff.textdiff(self.revision(self.node(rev1)),

841

self.revision(self.node(rev2)))

844

self.revision(self.node(rev2)))

842

845

843

def revision(self, node):

846

def revision(self, node):

844

"""return an uncompressed revision of a given node"""

847

"""return an uncompressed revision of a given node"""

845

cachedrev = None

848

cachedrev = None

846

if node == nullid:

849

if node == nullid:

847

return ""

850

return ""

848

if self._cache:

851

if self._cache:

849

if self._cache[0] == node:

852

if self._cache[0] == node:

850

return self._cache[2]

853

return self._cache[2]

851

cachedrev = self._cache[1]

854

cachedrev = self._cache[1]

852

855

853

# look up what we need to read

856

# look up what we need to read

854

text = None

857

text = None

855

rev = self.rev(node)

858

rev = self.rev(node)

856

base = self.base(rev)

859

base = self.base(rev)

857

860

858

# check rev flags

861

# check rev flags

859

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

862

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

860

raise RevlogError(_('incompatible revision flag %x') %

863

raise RevlogError(_('incompatible revision flag %x') %

861

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

864

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

862

865

863

# build delta chain

866

# build delta chain

864

chain = []

867

chain = []

865

index = self.index # for performance

868

index = self.index # for performance

866

iterrev = rev

869

iterrev = rev

867

e = index[iterrev]

870

e = index[iterrev]

868

while iterrev != base and iterrev != cachedrev:

871

while iterrev != base and iterrev != cachedrev:

869

chain.append(iterrev)

872

chain.append(iterrev)

870

if e[0] & REVIDX_PARENTDELTA:

873

if e[0] & REVIDX_PARENTDELTA:

871

iterrev = e[5]

874

iterrev = e[5]

872

else:

875

else:

873

iterrev -= 1

876

iterrev -= 1

874

e = index[iterrev]

877

e = index[iterrev]

875

chain.reverse()

878

chain.reverse()

876

base = iterrev

879

base = iterrev

877

880

878

if iterrev == cachedrev:

881

if iterrev == cachedrev:

879

# cache hit

882

# cache hit

880

text = self._cache[2]

883

text = self._cache[2]

881

884

882

# drop cache to save memory

885

# drop cache to save memory

883

self._cache = None

886

self._cache = None

884

887

885

self._chunkraw(base, rev)

888

self._chunkraw(base, rev)

886

if text is None:

889

if text is None:

887

text = self._chunk(base)

890

text = self._chunkbase(base)

888

891

889

bins = [self._chunk(r) for r in chain]

892

bins = [self._chunk(r) for r in chain]

890

text = mdiff.patches(text, bins)

893

text = mdiff.patches(text, bins)

891

894

892

text = self._checkhash(text, node, rev)

895

text = self._checkhash(text, node, rev)

893

896

894

self._cache = (node, rev, text)

897

self._cache = (node, rev, text)

895

return text

898

return text

896

899

897

def _checkhash(self, text, node, rev):

900

def _checkhash(self, text, node, rev):

898

p1, p2 = self.parents(node)

901

p1, p2 = self.parents(node)

899

if (node != hash(text, p1, p2) and

902

if (node != hash(text, p1, p2) and

900

not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):

903

not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):

901

raise RevlogError(_("integrity check failed on %s:%d")

904

raise RevlogError(_("integrity check failed on %s:%d")

902

% (self.indexfile, rev))

905

% (self.indexfile, rev))

903

return text

906

return text

904

907

905

def checkinlinesize(self, tr, fp=None):

908

def checkinlinesize(self, tr, fp=None):

906

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

909

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

907

return

910

return

908

911

909

trinfo = tr.find(self.indexfile)

912

trinfo = tr.find(self.indexfile)

910

if trinfo is None:

913

if trinfo is None:

911

raise RevlogError(_("%s not found in the transaction")

914

raise RevlogError(_("%s not found in the transaction")

912

% self.indexfile)

915

% self.indexfile)

913

916

914

trindex = trinfo[2]

917

trindex = trinfo[2]

915

dataoff = self.start(trindex)

918

dataoff = self.start(trindex)

916

919

917

tr.add(self.datafile, dataoff)

920

tr.add(self.datafile, dataoff)

918

921

919

if fp:

922

if fp:

920

fp.flush()

923

fp.flush()

921

fp.close()

924

fp.close()

922

925

923

df = self.opener(self.datafile, 'w')

926

df = self.opener(self.datafile, 'w')

924

try:

927

try:

925

for r in self:

928

for r in self:

926

df.write(self._chunkraw(r, r))

929

df.write(self._chunkraw(r, r))

927

finally:

930

finally:

928

df.close()

931

df.close()

929

932

930

fp = self.opener(self.indexfile, 'w', atomictemp=True)

933

fp = self.opener(self.indexfile, 'w', atomictemp=True)

931

self.version &= ~(REVLOGNGINLINEDATA)

934

self.version &= ~(REVLOGNGINLINEDATA)

932

self._inline = False

935

self._inline = False

933

for i in self:

936

for i in self:

934

e = self._io.packentry(self.index[i], self.node, self.version, i)

937

e = self._io.packentry(self.index[i], self.node, self.version, i)

935

fp.write(e)

938

fp.write(e)

936

939

937

# if we don't call rename, the temp file will never replace the

940

# if we don't call rename, the temp file will never replace the

938

# real index

941

# real index

939

fp.rename()

942

fp.rename()

940

943

941

tr.replace(self.indexfile, trindex * self._io.size)

944

tr.replace(self.indexfile, trindex * self._io.size)

942

self._chunkclear()

945

self._chunkclear()

943

946

944

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

947

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

945

"""add a revision to the log

948

"""add a revision to the log

946

949

947

text - the revision data to add

950

text - the revision data to add

948

transaction - the transaction object used for rollback

951

transaction - the transaction object used for rollback

949

link - the linkrev data to add

952

link - the linkrev data to add

950

p1, p2 - the parent nodeids of the revision

953

p1, p2 - the parent nodeids of the revision

951

cachedelta - an optional precomputed delta

954

cachedelta - an optional precomputed delta

952

"""

955

"""

953

node = hash(text, p1, p2)

956

node = hash(text, p1, p2)

954

if (node in self.nodemap and

957

if (node in self.nodemap and

955

(not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):

958

(not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):

956

return node

959

return node

957

960

958

dfh = None

961

dfh = None

959

if not self._inline:

962

if not self._inline:

960

dfh = self.opener(self.datafile, "a")

963

dfh = self.opener(self.datafile, "a")

961

ifh = self.opener(self.indexfile, "a+")

964

ifh = self.opener(self.indexfile, "a+")

962

try:

965

try:

963

return self._addrevision(node, text, transaction, link, p1, p2,

966

return self._addrevision(node, text, transaction, link, p1, p2,

964

cachedelta, ifh, dfh)

967

cachedelta, ifh, dfh)

965

finally:

968

finally:

966

if dfh:

969

if dfh:

967

dfh.close()

970

dfh.close()

968

ifh.close()

971

ifh.close()

969

972

970

def _addrevision(self, node, text, transaction, link, p1, p2,

973

def _addrevision(self, node, text, transaction, link, p1, p2,

971

cachedelta, ifh, dfh):

974

cachedelta, ifh, dfh):

972

975

973

btext = [text]

976

btext = [text]

974

def buildtext():

977

def buildtext():

975

if btext[0] is not None:

978

if btext[0] is not None:

976

return btext[0]

979

return btext[0]

977

# flush any pending writes here so we can read it in revision

980

# flush any pending writes here so we can read it in revision

978

if dfh:

981

if dfh:

979

dfh.flush()

982

dfh.flush()

980

ifh.flush()

983

ifh.flush()

981

basetext = self.revision(self.node(cachedelta[0]))

984

basetext = self.revision(self.node(cachedelta[0]))

982

btext[0] = mdiff.patch(basetext, cachedelta[1])

985

btext[0] = mdiff.patch(basetext, cachedelta[1])

983

chk = hash(btext[0], p1, p2)

986

chk = hash(btext[0], p1, p2)

984

if chk != node:

987

if chk != node:

985

raise RevlogError(_("consistency error in delta"))

988

raise RevlogError(_("consistency error in delta"))

986

return btext[0]

989

return btext[0]

987

990

988

def builddelta(rev):

991

def builddelta(rev):

989

# can we use the cached delta?

992

# can we use the cached delta?

990

if cachedelta and cachedelta[0] == rev:

993

if cachedelta and cachedelta[0] == rev:

991

delta = cachedelta[1]

994

delta = cachedelta[1]

992

else:

995

else:

993

t = buildtext()

996

t = buildtext()

994

ptext = self.revision(self.node(rev))

997

ptext = self.revision(self.node(rev))

995

delta = mdiff.textdiff(ptext, t)

998

delta = mdiff.textdiff(ptext, t)

996

data = compress(delta)

999

data = compress(delta)

997

l = len(data[1]) + len(data[0])

1000

l = len(data[1]) + len(data[0])

998

base = self.base(rev)

1001

base = self.base(rev)

999

dist = l + offset - self.start(base)

1002

dist = l + offset - self.start(base)

1000

return dist, l, data, base

1003

return dist, l, data, base

1001

1004

1002

curr = len(self)

1005

curr = len(self)

1003

prev = curr - 1

1006

prev = curr - 1

1004

base = curr

1007

base = curr

1005

offset = self.end(prev)

1008

offset = self.end(prev)

1006

flags = 0

1009

flags = 0

1007

d = None

1010

d = None

1008

p1r, p2r = self.rev(p1), self.rev(p2)

1011

p1r, p2r = self.rev(p1), self.rev(p2)

1009

1012

1010

# should we try to build a delta?

1013

# should we try to build a delta?

1011

if prev != nullrev:

1014

if prev != nullrev:

1012

d = builddelta(prev)

1015

d = builddelta(prev)

1013

if self._parentdelta and prev != p1r:

1016

if self._parentdelta and prev != p1r:

1014

d2 = builddelta(p1r)

1017

d2 = builddelta(p1r)

1015

if d2 < d:

1018

if d2 < d:

1016

d = d2

1019

d = d2

1017

flags = REVIDX_PARENTDELTA

1020

flags = REVIDX_PARENTDELTA

1018

dist, l, data, base = d

1021

dist, l, data, base = d

1019

1022

1020

# full versions are inserted when the needed deltas

1023

# full versions are inserted when the needed deltas

1021

# become comparable to the uncompressed text

1024

# become comparable to the uncompressed text

1022

# or the base revision is punched

1025

# or the base revision is punched

1023

if text is None:

1026

if text is None:

1024

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1027

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1025

cachedelta[1])

1028

cachedelta[1])

1026

else:

1029

else:

1027

textlen = len(text)

1030

textlen = len(text)

1028

if (d is None or dist > textlen * 2 or

1031

if (d is None or dist > textlen * 2 or

1029

(self.flags(base) & REVIDX_PUNCHED_FLAG)):

1032

(self.flags(base) & REVIDX_PUNCHED_FLAG)):

1030

text = buildtext()

1033

text = buildtext()

1031

data = compress(text)

1034

data = compress(text)

1032

l = len(data[1]) + len(data[0])

1035

l = len(data[1]) + len(data[0])

1033

base = curr

1036

base = curr

1034

1037

1035

e = (offset_type(offset, flags), l, textlen,

1038

e = (offset_type(offset, flags), l, textlen,

1036

base, link, p1r, p2r, node)

1039

base, link, p1r, p2r, node)

1037

self.index.insert(-1, e)

1040

self.index.insert(-1, e)

1038

self.nodemap[node] = curr

1041

self.nodemap[node] = curr

1039

1042

1040

entry = self._io.packentry(e, self.node, self.version, curr)

1043

entry = self._io.packentry(e, self.node, self.version, curr)

1041

if not self._inline:

1044

if not self._inline:

1042

transaction.add(self.datafile, offset)

1045

transaction.add(self.datafile, offset)

1043

transaction.add(self.indexfile, curr * len(entry))

1046

transaction.add(self.indexfile, curr * len(entry))

1044

if data[0]:

1047

if data[0]:

1045

dfh.write(data[0])

1048

dfh.write(data[0])

1046

dfh.write(data[1])

1049

dfh.write(data[1])

1047

dfh.flush()

1050

dfh.flush()

1048

ifh.write(entry)

1051

ifh.write(entry)

1049

else:

1052

else:

1050

offset += curr * self._io.size

1053

offset += curr * self._io.size

1051

transaction.add(self.indexfile, offset, curr)

1054

transaction.add(self.indexfile, offset, curr)

1052

ifh.write(entry)

1055

ifh.write(entry)

1053

ifh.write(data[0])

1056

ifh.write(data[0])

1054

ifh.write(data[1])

1057

ifh.write(data[1])

1055

self.checkinlinesize(transaction, ifh)

1058

self.checkinlinesize(transaction, ifh)

1056

1059

1057

if type(text) == str: # only accept immutable objects

1060

if type(text) == str: # only accept immutable objects

1058

self._cache = (node, curr, text)

1061

self._cache = (node, curr, text)

1059

return node

1062

return node

1060

1063

1061

def group(self, nodelist, bundler):

1064

def group(self, nodelist, bundler):

1062

"""Calculate a delta group, yielding a sequence of changegroup chunks

1065

"""Calculate a delta group, yielding a sequence of changegroup chunks

1063

(strings).

1066

(strings).

1064

1067

1065

Given a list of changeset revs, return a set of deltas and

1068

Given a list of changeset revs, return a set of deltas and

1066

metadata corresponding to nodes. The first delta is

1069

metadata corresponding to nodes. The first delta is

1067

first parent(nodelist[0]) -> nodelist[0], the receiver is

1070

first parent(nodelist[0]) -> nodelist[0], the receiver is

1068

guaranteed to have this parent as it has all history before

1071

guaranteed to have this parent as it has all history before

1069

these changesets. In the case firstparent is nullrev the

1072

these changesets. In the case firstparent is nullrev the

1070

changegroup starts with a full revision.

1073

changegroup starts with a full revision.

1071

"""

1074

"""

1072

1075

1073

revs = sorted([self.rev(n) for n in nodelist])

1076

revs = sorted([self.rev(n) for n in nodelist])

1074

1077

1075

# if we don't have any revisions touched by these changesets, bail

1078

# if we don't have any revisions touched by these changesets, bail

1076

if not revs:

1079

if not revs:

1077

yield bundler.close()

1080

yield bundler.close()

1078

return

1081

return

1079

1082

1080

# add the parent of the first rev

1083

# add the parent of the first rev

1081

p = self.parentrevs(revs[0])[0]

1084

p = self.parentrevs(revs[0])[0]

1082

revs.insert(0, p)

1085

revs.insert(0, p)

1083

1086

1084

# build deltas

1087

# build deltas

1085

for r in xrange(len(revs) - 1):

1088

for r in xrange(len(revs) - 1):

1086

a, b = revs[r], revs[r + 1]

1089

a, b = revs[r], revs[r + 1]

1087

nb = self.node(b)

1090

nb = self.node(b)

1088

p1, p2 = self.parents(nb)

1091

p1, p2 = self.parents(nb)

1089

prefix = ''

1092

prefix = ''

1090

1093

1091

if a == nullrev:

1094

if a == nullrev:

1092

d = self.revision(nb)

1095

d = self.revision(nb)

1093

prefix = mdiff.trivialdiffheader(len(d))

1096

prefix = mdiff.trivialdiffheader(len(d))

1094

else:

1097

else:

1095

d = self.revdiff(a, b)

1098

d = self.revdiff(a, b)

1096

for c in bundler.revchunk(self, nb, p1, p2, prefix, d):

1099

for c in bundler.revchunk(self, nb, p1, p2, prefix, d):

1097

yield c

1100

yield c

1098

1101

1099

yield bundler.close()

1102

yield bundler.close()

1100

1103

1101

def addgroup(self, bundle, linkmapper, transaction):

1104

def addgroup(self, bundle, linkmapper, transaction):

1102

"""

1105

"""

1103

add a delta group

1106

add a delta group

1104

1107

1105

given a set of deltas, add them to the revision log. the

1108

given a set of deltas, add them to the revision log. the

1106

first delta is against its parent, which should be in our

1109

first delta is against its parent, which should be in our

1107

log, the rest are against the previous delta.

1110

log, the rest are against the previous delta.

1108

"""

1111

"""

1109

1112

1110

# track the base of the current delta log

1113

# track the base of the current delta log

1111

node = None

1114

node = None

1112

1115

1113

r = len(self)

1116

r = len(self)

1114

end = 0

1117

end = 0

1115

if r:

1118

if r:

1116

end = self.end(r - 1)

1119

end = self.end(r - 1)

1117

ifh = self.opener(self.indexfile, "a+")

1120

ifh = self.opener(self.indexfile, "a+")

1118

isize = r * self._io.size

1121

isize = r * self._io.size

1119

if self._inline:

1122

if self._inline:

1120

transaction.add(self.indexfile, end + isize, r)

1123

transaction.add(self.indexfile, end + isize, r)

1121

dfh = None

1124

dfh = None

1122

else:

1125

else:

1123

transaction.add(self.indexfile, isize, r)

1126

transaction.add(self.indexfile, isize, r)

1124

transaction.add(self.datafile, end)

1127

transaction.add(self.datafile, end)

1125

dfh = self.opener(self.datafile, "a")

1128

dfh = self.opener(self.datafile, "a")

1126

1129

1127

try:

1130

try:

1128

# loop through our set of deltas

1131

# loop through our set of deltas

1129

chain = None

1132

chain = None

1130

while 1:

1133

while 1:

1131

chunkdata = bundle.parsechunk()

1134

chunkdata = bundle.parsechunk()

1132

if not chunkdata:

1135

if not chunkdata:

1133

break

1136

break

1134

node = chunkdata['node']

1137

node = chunkdata['node']

1135

p1 = chunkdata['p1']

1138

p1 = chunkdata['p1']

1136

p2 = chunkdata['p2']

1139

p2 = chunkdata['p2']

1137

cs = chunkdata['cs']

1140

cs = chunkdata['cs']

1138

delta = chunkdata['data']

1141

delta = chunkdata['data']

1139

1142

1140

link = linkmapper(cs)

1143

link = linkmapper(cs)

1141

if (node in self.nodemap and

1144

if (node in self.nodemap and

1142

(not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):

1145

(not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):

1143

# this can happen if two branches make the same change

1146

# this can happen if two branches make the same change

1144

chain = node

1147

chain = node

1145

continue

1148

continue

1146

1149

1147

for p in (p1, p2):

1150

for p in (p1, p2):

1148

if not p in self.nodemap:

1151

if not p in self.nodemap:

1149

if self._shallow:

1152

if self._shallow:

1150

# add null entries for missing parents

1153

# add null entries for missing parents

1151

# XXX FIXME

1154

# XXX FIXME

1152

#if base == nullrev:

1155

#if base == nullrev:

1153

# base = len(self)

1156

# base = len(self)

1154

#e = (offset_type(end, REVIDX_PUNCHED_FLAG),

1157

#e = (offset_type(end, REVIDX_PUNCHED_FLAG),

1155

# 0, 0, base, nullrev, nullrev, nullrev, p)

1158

# 0, 0, base, nullrev, nullrev, nullrev, p)

1156

#self.index.insert(-1, e)

1159

#self.index.insert(-1, e)

1157

#self.nodemap[p] = r

1160

#self.nodemap[p] = r

1158

#entry = self._io.packentry(e, self.node,

1161

#entry = self._io.packentry(e, self.node,

1159

# self.version, r)

1162

# self.version, r)

1160

#ifh.write(entry)

1163

#ifh.write(entry)

1161

#t, r = r, r + 1

1164

#t, r = r, r + 1

1162

raise LookupError(p, self.indexfile,

1165

raise LookupError(p, self.indexfile,

1163

_('unknown parent'))

1166

_('unknown parent'))

1164

else:

1167

else:

1165

raise LookupError(p, self.indexfile,

1168

raise LookupError(p, self.indexfile,

1166

_('unknown parent'))

1169

_('unknown parent'))

1167

1170

1168

if not chain:

1171

if not chain:

1169

# retrieve the parent revision of the delta chain

1172

# retrieve the parent revision of the delta chain

1170

chain = p1

1173

chain = p1

1171

if not chain in self.nodemap:

1174

if not chain in self.nodemap:

1172

raise LookupError(chain, self.indexfile, _('unknown base'))

1175

raise LookupError(chain, self.indexfile, _('unknown base'))

1173

1176

1174

chainrev = self.rev(chain)

1177

chainrev = self.rev(chain)

1175

chain = self._addrevision(node, None, transaction, link,

1178

chain = self._addrevision(node, None, transaction, link,

1176

p1, p2, (chainrev, delta), ifh, dfh)

1179

p1, p2, (chainrev, delta), ifh, dfh)

1177

if not dfh and not self._inline:

1180

if not dfh and not self._inline:

1178

# addrevision switched from inline to conventional

1181

# addrevision switched from inline to conventional

1179

# reopen the index

1182

# reopen the index

1180

ifh.close()

1183

ifh.close()

1181

dfh = self.opener(self.datafile, "a")

1184

dfh = self.opener(self.datafile, "a")

1182

ifh = self.opener(self.indexfile, "a")

1185

ifh = self.opener(self.indexfile, "a")

1183

finally:

1186

finally:

1184

if dfh:

1187

if dfh:

1185

dfh.close()

1188

dfh.close()

1186

ifh.close()

1189

ifh.close()

1187

1190

1188

return node

1191

return node

1189

1192

1190

def strip(self, minlink, transaction):

1193

def strip(self, minlink, transaction):

1191

"""truncate the revlog on the first revision with a linkrev >= minlink

1194

"""truncate the revlog on the first revision with a linkrev >= minlink

1192

1195

1193

This function is called when we're stripping revision minlink and

1196

This function is called when we're stripping revision minlink and

1194

its descendants from the repository.

1197

its descendants from the repository.

1195

1198

1196

We have to remove all revisions with linkrev >= minlink, because

1199

We have to remove all revisions with linkrev >= minlink, because

1197

the equivalent changelog revisions will be renumbered after the

1200

the equivalent changelog revisions will be renumbered after the

1198

strip.

1201

strip.

1199

1202

1200

So we truncate the revlog on the first of these revisions, and

1203

So we truncate the revlog on the first of these revisions, and

1201

trust that the caller has saved the revisions that shouldn't be

1204

trust that the caller has saved the revisions that shouldn't be

1202

removed and that it'll readd them after this truncation.

1205

removed and that it'll readd them after this truncation.

1203

"""

1206

"""

1204

if len(self) == 0:

1207

if len(self) == 0:

1205

return

1208

return

1206

1209

1207

for rev in self:

1210

for rev in self:

1208

if self.index[rev][4] >= minlink:

1211

if self.index[rev][4] >= minlink:

1209

break

1212

break

1210

else:

1213

else:

1211

return

1214

return

1212

1215

1213

# first truncate the files on disk

1216

# first truncate the files on disk

1214

end = self.start(rev)

1217

end = self.start(rev)

1215

if not self._inline:

1218

if not self._inline:

1216

transaction.add(self.datafile, end)

1219

transaction.add(self.datafile, end)

1217

end = rev * self._io.size

1220

end = rev * self._io.size

1218

else:

1221

else:

1219

end += rev * self._io.size

1222

end += rev * self._io.size

1220

1223

1221

transaction.add(self.indexfile, end)

1224

transaction.add(self.indexfile, end)

1222

1225

1223

# then reset internal state in memory to forget those revisions

1226

# then reset internal state in memory to forget those revisions

1224

self._cache = None

1227

self._cache = None

1225

self._chunkclear()

1228

self._chunkclear()

1226

for x in xrange(rev, len(self)):

1229

for x in xrange(rev, len(self)):

1227

del self.nodemap[self.node(x)]

1230

del self.nodemap[self.node(x)]

1228

1231

1229

del self.index[rev:-1]

1232

del self.index[rev:-1]

1230

1233

1231

def checksize(self):

1234

def checksize(self):

1232

expected = 0

1235

expected = 0

1233

if len(self):

1236

if len(self):

1234

expected = max(0, self.end(len(self) - 1))

1237

expected = max(0, self.end(len(self) - 1))

1235

1238

1236

try:

1239

try:

1237

f = self.opener(self.datafile)

1240

f = self.opener(self.datafile)

1238

f.seek(0, 2)

1241

f.seek(0, 2)

1239

actual = f.tell()

1242

actual = f.tell()

1240

f.close()

1243

f.close()

1241

dd = actual - expected

1244

dd = actual - expected

1242

except IOError, inst:

1245

except IOError, inst:

1243

if inst.errno != errno.ENOENT:

1246

if inst.errno != errno.ENOENT:

1244

raise

1247

raise

1245

dd = 0

1248

dd = 0

1246

1249

1247

try:

1250

try:

1248

f = self.opener(self.indexfile)

1251

f = self.opener(self.indexfile)

1249

f.seek(0, 2)

1252

f.seek(0, 2)

1250

actual = f.tell()

1253

actual = f.tell()

1251

f.close()

1254

f.close()

1252

s = self._io.size

1255

s = self._io.size

1253

i = max(0, actual // s)

1256

i = max(0, actual // s)

1254

di = actual - (i * s)

1257

di = actual - (i * s)

1255

if self._inline:

1258

if self._inline:

1256

databytes = 0

1259

databytes = 0

1257

for r in self:

1260

for r in self:

1258

databytes += max(0, self.length(r))

1261

databytes += max(0, self.length(r))

1259

dd = 0

1262

dd = 0

1260

di = actual - len(self) * s - databytes

1263

di = actual - len(self) * s - databytes

1261

except IOError, inst:

1264

except IOError, inst:

1262

if inst.errno != errno.ENOENT:

1265

if inst.errno != errno.ENOENT:

1263

raise

1266

raise

1264

di = 0

1267

di = 0

1265

1268

1266

return (dd, di)

1269

return (dd, di)

1267

1270

1268

def files(self):

1271

def files(self):

1269

res = [self.indexfile]

1272

res = [self.indexfile]

1270

if not self._inline:

1273

if not self._inline:

1271

res.append(self.datafile)

1274

res.append(self.datafile)

1272

return res

1275

return res

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             # import stuff from node for others to import from revlog
             from node import bin, hex, nullid, nullrev, short #@UnusedImport
             from i18n import _
             import ancestor, mdiff, parsers, error, util
             import struct, zlib, errno
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGSHALLOW = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
             # revlog index flags
             REVIDX_PARENTDELTA  = 1
             REVIDX_PUNCHED_FLAG = 2
             REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             nullhash = _sha(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = _sha(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
                 bin = None
                 if l < 44:
                     pass
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
                 if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     return _decompress(bin)
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, None, cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile, shallowroot=None):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._chunkcache = (0, '')
                     self.index = []
                     self._shallowroot = shallowroot
                     self._parentdelta = 0
                     self._pcache = {}
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     v = REVLOG_DEFAULT_VERSION
                     if hasattr(opener, 'options') and 'defversion' in opener.options:
                         v = opener.options['defversion']
                         if v & REVLOGNG:
                             v |= REVLOGNGINLINEDATA
                         if v & REVLOGNG and 'parentdelta' in opener.options:
                             self._parentdelta = 1
                     if shallowroot:
                         v |= REVLOGSHALLOW
                     i = ''
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read()
                         f.close()
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i[:4])[0]
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._shallow = v & REVLOGSHALLOW
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(i, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     for i in xrange(len(self)):
                         yield i
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except KeyError:
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def base(self, rev):
                     return self.index[rev][3]
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def reachable(self, node, stop=None):
                     """return the set of all nodes ancestral to a given node, including
                      the node itself, stopping when stop is matched"""
                     reachable = set((node,))
                     visit = [node]
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable.add(p)
                                 visit.append(p)
                     return reachable
                 def ancestors(self, *revs):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Yield a sequence of revision numbers starting with the parents
                     of each revision in revs, i.e., each revision is *not* considered
                     an ancestor of itself.  Results are in breadth-first order:
                     parents of each rev in revs, then parents of those, etc.  Result
                     does not include the null revision."""
                     visit = list(revs)
                     seen = set([nullrev])
                     while visit:
                         for parent in self.parentrevs(visit.pop(0)):
                             if parent not in seen:
                                 visit.append(parent)
                                 seen.add(parent)
                                 yield parent
                 def descendants(self, *revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in xrange(first + 1, len(self)):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common.
                     More specifically, the second element is a list of nodes N such that
                     every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     has = set(self.ancestors(*common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = [r for r in heads if r not in has]
                     while visit:
                         r = visit.pop(0)
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(r) for r in missing]
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     _common, missing = self.findcommonmissing(common, heads)
                     return missing
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendents = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == nullrev:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents.add(n)
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         count = len(self)
                         if not count:
                             return [nullid]
                         ishead = [1] * (count + 1)
                         index = self.index
                         for r in xrange(count):
                             e = index[r]
                             ishead[e[5]] = ishead[e[6]] = 0
                         return [self.node(r) for r in xrange(count) if ishead[r]]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, len(self)):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, len(self)):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants(start):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # fast path, check if it is a descendant
                     a, b = self.rev(a), self.rev(b)
                     start, end = sorted((a, b))
                     if self.descendant(start, end):
                         return self.node(start)
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(a, b, parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def _match(self, id):
                     if isinstance(id, (long, int)):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length):
                     if self._inline:
                         df = self.opener(self.indexfile)
                     else:
                         df = self.opener(self.datafile)
                     readahead = max(65536, length)
                     df.seek(offset)
                     d = df.read(readahead)
                     self._addchunk(offset, d)
                     if readahead > length:
                         return d[:length]
                     return d
                 def _getchunk(self, offset, length):
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return d[cachestart:cacheend]
                     return self._loadchunk(offset, length)
                 def _chunkraw(self, startrev, endrev):
                     start = self.start(startrev)
                     length = self.end(endrev) - start
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                     return self._getchunk(start, length)
                 def _chunk(self, rev):
                     return decompress(self._chunkraw(rev, rev))
+                def _chunkbase(self, rev):
+                    return self._chunk(rev)
                 def _chunkclear(self):
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return previous revision or parentrev according to flags"""
                     if self.flags(rev) & REVIDX_PARENTDELTA:
                         return self.parentrevs(rev)[0]
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
                         return self._chunk(rev2)
                     return mdiff.textdiff(self.revision(self.node(rev1)),
                                           self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given node"""
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # check rev flags
                     if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
                     # build delta chain
                     chain = []
                     index = self.index # for performance
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != base and iterrev != cachedrev:
                         chain.append(iterrev)
                         if e[0] & REVIDX_PARENTDELTA:
                             iterrev = e[5]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     chain.reverse()
                     base = iterrev
                     if iterrev == cachedrev:
                         # cache hit
                         text = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     self._chunkraw(base, rev)
                     if text is None:
-                        text = self._chunk(base)
+                        text = self._chunkbase(base)
                     bins = [self._chunk(r) for r in chain]
                     text = mdiff.patches(text, bins)
                     text = self._checkhash(text, node, rev)
                     self._cache = (node, rev, text)
                     return text
                 def _checkhash(self, text, node, rev):
                     p1, p2 = self.parents(node)
                     if (node != hash(text, p1, p2) and
                         not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.indexfile, rev))
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r))
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     """
                     node = hash(text, p1, p2)
                     if (node in self.nodemap and
                         (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
                         return node
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def _addrevision(self, node, text, transaction, link, p1, p2,
                                  cachedelta, ifh, dfh):
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         # flush any pending writes here so we can read it in revision
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                         basetext = self.revision(self.node(cachedelta[0]))
                         btext[0] = mdiff.patch(basetext, cachedelta[1])
                         chk = hash(btext[0], p1, p2)
                         if chk != node:
                             raise RevlogError(_("consistency error in delta"))
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             ptext = self.revision(self.node(rev))
                             delta = mdiff.textdiff(ptext, t)
                         data = compress(delta)
                         l = len(data[1]) + len(data[0])
                         base = self.base(rev)
                         dist = l + offset - self.start(base)
                         return dist, l, data, base
                     curr = len(self)
                     prev = curr - 1
                     base = curr
                     offset = self.end(prev)
                     flags = 0
                     d = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev:
                         d = builddelta(prev)
                         if self._parentdelta and prev != p1r:
                             d2 = builddelta(p1r)
                             if d2 < d:
                                 d = d2
                                 flags = REVIDX_PARENTDELTA
                         dist, l, data, base = d
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     # or the base revision is punched
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     if (d is None or dist > textlen * 2 or
                         (self.flags(base) & REVIDX_PUNCHED_FLAG)):
                         text = buildtext()
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     return node
                 def group(self, nodelist, bundler):
                     """Calculate a delta group, yielding a sequence of changegroup chunks
                     (strings).
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. The first delta is
                     first parent(nodelist[0]) -> nodelist[0], the receiver is
                     guaranteed to have this parent as it has all history before
                     these changesets. In the case firstparent is nullrev the
                     changegroup starts with a full revision.
                     """
                     revs = sorted([self.rev(n) for n in nodelist])
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield bundler.close()
                         return
                     # add the parent of the first rev
                     p = self.parentrevs(revs[0])[0]
                     revs.insert(0, p)
                     # build deltas
                     for r in xrange(len(revs) - 1):
                         a, b = revs[r], revs[r + 1]
                         nb = self.node(b)
                         p1, p2 = self.parents(nb)
                         prefix = ''
                         if a == nullrev:
                             d = self.revision(nb)
                             prefix = mdiff.trivialdiffheader(len(d))
                         else:
                             d = self.revdiff(a, b)
                         for c in bundler.revchunk(self, nb, p1, p2, prefix, d):
                             yield c
                     yield bundler.close()
                 def addgroup(self, bundle, linkmapper, transaction):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     # track the base of the current delta log
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     try:
                         # loop through our set of deltas
                         chain = None
                         while 1:
                             chunkdata = bundle.parsechunk()
                             if not chunkdata:
                                 break
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             delta = chunkdata['data']
                             link = linkmapper(cs)
                             if (node in self.nodemap and
                                 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if not p in self.nodemap:
                                     if self._shallow:
                                         # add null entries for missing parents
                                         # XXX FIXME
                                         #if base == nullrev:
                                         #    base = len(self)
                                         #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
                                         #     0, 0, base, nullrev, nullrev, nullrev, p)
                                         #self.index.insert(-1, e)
                                         #self.nodemap[p] = r
                                         #entry = self._io.packentry(e, self.node,
                                         #                           self.version, r)
                                         #ifh.write(entry)
                                         #t, r = r, r + 1
                                         raise LookupError(p, self.indexfile,
                                                           _('unknown parent'))
                                     else:
                                         raise LookupError(p, self.indexfile,
                                                           _('unknown parent'))
                             if not chain:
                                 # retrieve the parent revision of the delta chain
                                 chain = p1
                                 if not chain in self.nodemap:
                                     raise LookupError(chain, self.indexfile, _('unknown base'))
                             chainrev = self.rev(chain)
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, (chainrev, delta), ifh, dfh)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a")
                                 ifh = self.opener(self.indexfile, "a")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return node
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll readd them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     for rev in self:
                         if self.index[rev][4] >= minlink:
                             break
                     else:
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res