upstream/mercurial-mirror Commit - r17134:e7167007

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from node import bin, hex, nullid, nullrev

15

from node import bin, hex, nullid, nullrev

16

from i18n import _

16

from i18n import _

17

import ancestor, mdiff, parsers, error, util, dagutil

17

import ancestor, mdiff, parsers, error, util, dagutil

18

import struct, zlib, errno

18

import struct, zlib, errno

19

20

_pack = struct.pack

20

_pack = struct.pack

21

_unpack = struct.unpack

21

_unpack = struct.unpack

22

_compress = zlib.compress

22

_compress = zlib.compress

23

_decompress = zlib.decompress

23

_decompress = zlib.decompress

24

_sha = util.sha1

24

_sha = util.sha1

25

26

# revlog header flags

26

# revlog header flags

27

REVLOGV0 = 0

27

REVLOGV0 = 0

28

REVLOGNG = 1

28

REVLOGNG = 1

29

REVLOGNGINLINEDATA = (1 << 16)

29

REVLOGNGINLINEDATA = (1 << 16)

30

REVLOGGENERALDELTA = (1 << 17)

30

REVLOGGENERALDELTA = (1 << 17)

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

35

36

# revlog index flags

36

# revlog index flags

37

REVIDX_KNOWN_FLAGS = 0

37

REVIDX_KNOWN_FLAGS = 0

38

39

# max size of revlog with inline data

39

# max size of revlog with inline data

40

_maxinline = 131072

40

_maxinline = 131072

41

_chunksize = 1048576

41

_chunksize = 1048576

42

43

RevlogError = error.RevlogError

43

RevlogError = error.RevlogError

44

LookupError = error.LookupError

44

LookupError = error.LookupError

45

46

def getoffset(q):

46

def getoffset(q):

47

return int(q >> 16)

47

return int(q >> 16)

48

49

def gettype(q):

49

def gettype(q):

50

return int(q & 0xFFFF)

50

return int(q & 0xFFFF)

51

52

def offset_type(offset, type):

52

def offset_type(offset, type):

53

return long(long(offset) << 16 | type)

53

return long(long(offset) << 16 | type)

54

55

nullhash = _sha(nullid)

55

nullhash = _sha(nullid)

56

57

def hash(text, p1, p2):

57

def hash(text, p1, p2):

58

"""generate a hash from the given text and its parent hashes

58

"""generate a hash from the given text and its parent hashes

59

60

This hash combines both the current file contents and its history

60

This hash combines both the current file contents and its history

61

in a manner that makes it easy to distinguish nodes with the same

61

in a manner that makes it easy to distinguish nodes with the same

62

content in the revision graph.

62

content in the revision graph.

63

"""

63

"""

64

# As of now, if one of the parent node is null, p2 is null

64

# As of now, if one of the parent node is null, p2 is null

65

if p2 == nullid:

65

if p2 == nullid:

66

# deep copy of a hash is faster than creating one

66

# deep copy of a hash is faster than creating one

67

s = nullhash.copy()

67

s = nullhash.copy()

68

s.update(p1)

68

s.update(p1)

69

else:

69

else:

70

# none of the parent nodes are nullid

70

# none of the parent nodes are nullid

71

l = [p1, p2]

71

l = [p1, p2]

72

l.sort()

72

l.sort()

73

s = _sha(l[0])

73

s = _sha(l[0])

74

s.update(l[1])

74

s.update(l[1])

75

s.update(text)

75

s.update(text)

76

return s.digest()

76

return s.digest()

77

78

def compress(text):

78

def compress(text):

79

""" generate a possibly-compressed representation of text """

79

""" generate a possibly-compressed representation of text """

80

if not text:

80

if not text:

81

return ("", text)

81

return ("", text)

82

l = len(text)

82

l = len(text)

83

bin = None

83

bin = None

84

if l < 44:

84

if l < 44:

85

pass

85

pass

86

elif l > 1000000:

86

elif l > 1000000:

87

# zlib makes an internal copy, thus doubling memory usage for

87

# zlib makes an internal copy, thus doubling memory usage for

88

# large files, so lets do this in pieces

88

# large files, so lets do this in pieces

89

z = zlib.compressobj()

89

z = zlib.compressobj()

90

p = []

90

p = []

91

pos = 0

91

pos = 0

92

while pos < l:

92

while pos < l:

93

pos2 = pos + 2**20

93

pos2 = pos + 2**20

94

p.append(z.compress(text[pos:pos2]))

94

p.append(z.compress(text[pos:pos2]))

95

pos = pos2

95

pos = pos2

96

p.append(z.flush())

96

p.append(z.flush())

97

if sum(map(len, p)) < l:

97

if sum(map(len, p)) < l:

98

bin = "".join(p)

98

bin = "".join(p)

99

else:

99

else:

100

bin = _compress(text)

100

bin = _compress(text)

101

if bin is None or len(bin) > l:

101

if bin is None or len(bin) > l:

102

if text[0] == '\0':

102

if text[0] == '\0':

103

return ("", text)

103

return ("", text)

104

return ('u', text)

104

return ('u', text)

105

return ("", bin)

105

return ("", bin)

106

107

def decompress(bin):

107

def decompress(bin):

108

""" decompress the given input """

108

""" decompress the given input """

109

if not bin:

109

if not bin:

110

return bin

110

return bin

111

t = bin[0]

111

t = bin[0]

112

if t == '\0':

112

if t == '\0':

113

return bin

113

return bin

114

if t == 'x':

114

if t == 'x':

115

try:

115

try:

116

return _decompress(bin)

116

return _decompress(bin)

117

except zlib.error, e:

117

except zlib.error, e:

118

raise RevlogError(_("revlog decompress error: %s") % str(e))

118

raise RevlogError(_("revlog decompress error: %s") % str(e))

119

if t == 'u':

119

if t == 'u':

120

return bin[1:]

120

return bin[1:]

121

raise RevlogError(_("unknown compression type %r") % t)

121

raise RevlogError(_("unknown compression type %r") % t)

122

123

indexformatv0 = ">4l20s20s20s"

123

indexformatv0 = ">4l20s20s20s"

124

v0shaoffset = 56

124

v0shaoffset = 56

125

126

class revlogoldio(object):

126

class revlogoldio(object):

127

def __init__(self):

127

def __init__(self):

128

self.size = struct.calcsize(indexformatv0)

128

self.size = struct.calcsize(indexformatv0)

129

130

def parseindex(self, data, inline):

130

def parseindex(self, data, inline):

131

s = self.size

131

s = self.size

132

index = []

132

index = []

133

nodemap = {nullid: nullrev}

133

nodemap = {nullid: nullrev}

134

n = off = 0

134

n = off = 0

135

l = len(data)

135

l = len(data)

136

while off + s <= l:

136

while off + s <= l:

137

cur = data[off:off + s]

137

cur = data[off:off + s]

138

off += s

138

off += s

139

e = _unpack(indexformatv0, cur)

139

e = _unpack(indexformatv0, cur)

140

# transform to revlogv1 format

140

# transform to revlogv1 format

141

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

141

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

142

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

142

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

143

index.append(e2)

143

index.append(e2)

144

nodemap[e[6]] = n

144

nodemap[e[6]] = n

145

n += 1

145

n += 1

146

147

# add the magic null revision at -1

147

# add the magic null revision at -1

148

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

148

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

149

150

return index, nodemap, None

150

return index, nodemap, None

151

152

def packentry(self, entry, node, version, rev):

152

def packentry(self, entry, node, version, rev):

153

if gettype(entry[0]):

153

if gettype(entry[0]):

154

raise RevlogError(_("index entry flags need RevlogNG"))

154

raise RevlogError(_("index entry flags need RevlogNG"))

155

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

155

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

156

node(entry[5]), node(entry[6]), entry[7])

156

node(entry[5]), node(entry[6]), entry[7])

157

return _pack(indexformatv0, *e2)

157

return _pack(indexformatv0, *e2)

158

159

# index ng:

159

# index ng:

160

# 6 bytes: offset

160

# 6 bytes: offset

161

# 2 bytes: flags

161

# 2 bytes: flags

162

# 4 bytes: compressed length

162

# 4 bytes: compressed length

163

# 4 bytes: uncompressed length

163

# 4 bytes: uncompressed length

164

# 4 bytes: base rev

164

# 4 bytes: base rev

165

# 4 bytes: link rev

165

# 4 bytes: link rev

166

# 4 bytes: parent 1 rev

166

# 4 bytes: parent 1 rev

167

# 4 bytes: parent 2 rev

167

# 4 bytes: parent 2 rev

168

# 32 bytes: nodeid

168

# 32 bytes: nodeid

169

indexformatng = ">Qiiiiii20s12x"

169

indexformatng = ">Qiiiiii20s12x"

170

ngshaoffset = 32

170

ngshaoffset = 32

171

versionformat = ">I"

171

versionformat = ">I"

172

173

class revlogio(object):

173

class revlogio(object):

174

def __init__(self):

174

def __init__(self):

175

self.size = struct.calcsize(indexformatng)

175

self.size = struct.calcsize(indexformatng)

176

177

def parseindex(self, data, inline):

177

def parseindex(self, data, inline):

178

# call the C implementation to parse the index data

178

# call the C implementation to parse the index data

179

index, cache = parsers.parse_index2(data, inline)

179

index, cache = parsers.parse_index2(data, inline)

180

return index, getattr(index, 'nodemap', None), cache

180

return index, getattr(index, 'nodemap', None), cache

181

182

def packentry(self, entry, node, version, rev):

182

def packentry(self, entry, node, version, rev):

183

p = _pack(indexformatng, *entry)

183

p = _pack(indexformatng, *entry)

184

if rev == 0:

184

if rev == 0:

185

p = _pack(versionformat, version) + p[4:]

185

p = _pack(versionformat, version) + p[4:]

186

return p

186

return p

187

188

class revlog(object):

188

class revlog(object):

189

"""

189

"""

190

the underlying revision storage object

190

the underlying revision storage object

191

192

A revlog consists of two parts, an index and the revision data.

192

A revlog consists of two parts, an index and the revision data.

193

194

The index is a file with a fixed record size containing

194

The index is a file with a fixed record size containing

195

information on each revision, including its nodeid (hash), the

195

information on each revision, including its nodeid (hash), the

196

nodeids of its parents, the position and offset of its data within

196

nodeids of its parents, the position and offset of its data within

197

the data file, and the revision it's based on. Finally, each entry

197

the data file, and the revision it's based on. Finally, each entry

198

contains a linkrev entry that can serve as a pointer to external

198

contains a linkrev entry that can serve as a pointer to external

199

data.

199

data.

200

201

The revision data itself is a linear collection of data chunks.

201

The revision data itself is a linear collection of data chunks.

202

Each chunk represents a revision and is usually represented as a

202

Each chunk represents a revision and is usually represented as a

203

delta against the previous chunk. To bound lookup time, runs of

203

delta against the previous chunk. To bound lookup time, runs of

204

deltas are limited to about 2 times the length of the original

204

deltas are limited to about 2 times the length of the original

205

version data. This makes retrieval of a version proportional to

205

version data. This makes retrieval of a version proportional to

206

its size, or O(1) relative to the number of revisions.

206

its size, or O(1) relative to the number of revisions.

207

208

Both pieces of the revlog are written to in an append-only

208

Both pieces of the revlog are written to in an append-only

209

fashion, which means we never need to rewrite a file to insert or

209

fashion, which means we never need to rewrite a file to insert or

210

remove data, and can use some simple techniques to avoid the need

210

remove data, and can use some simple techniques to avoid the need

211

for locking while reading.

211

for locking while reading.

212

"""

212

"""

213

def __init__(self, opener, indexfile):

213

def __init__(self, opener, indexfile):

214

"""

214

"""

215

create a revlog object

215

create a revlog object

216

217

opener is a function that abstracts the file opening operation

217

opener is a function that abstracts the file opening operation

218

and can be used to implement COW semantics or the like.

218

and can be used to implement COW semantics or the like.

219

"""

219

"""

220

self.indexfile = indexfile

220

self.indexfile = indexfile

221

self.datafile = indexfile[:-2] + ".d"

221

self.datafile = indexfile[:-2] + ".d"

222

self.opener = opener

222

self.opener = opener

223

self._cache = None

223

self._cache = None

224

self._basecache = (0, 0)

224

self._basecache = (0, 0)

225

self._chunkcache = (0, '')

225

self._chunkcache = (0, '')

226

self.index = []

226

self.index = []

227

self._pcache = {}

227

self._pcache = {}

228

self._nodecache = {nullid: nullrev}

228

self._nodecache = {nullid: nullrev}

229

self._nodepos = None

229

self._nodepos = None

230

231

v = REVLOG_DEFAULT_VERSION

231

v = REVLOG_DEFAULT_VERSION

232

opts = getattr(opener, 'options', None)

232

opts = getattr(opener, 'options', None)

233

if opts is not None:

233

if opts is not None:

234

if 'revlogv1' in opts:

234

if 'revlogv1' in opts:

235

if 'generaldelta' in opts:

235

if 'generaldelta' in opts:

236

v |= REVLOGGENERALDELTA

236

v |= REVLOGGENERALDELTA

237

else:

237

else:

238

v = 0

238

v = 0

239

240

i = ''

240

i = ''

241

self._initempty = True

241

self._initempty = True

242

try:

242

try:

243

f = self.opener(self.indexfile)

243

f = self.opener(self.indexfile)

244

i = f.read()

244

i = f.read()

245

f.close()

245

f.close()

246

if len(i) > 0:

246

if len(i) > 0:

247

v = struct.unpack(versionformat, i[:4])[0]

247

v = struct.unpack(versionformat, i[:4])[0]

248

self._initempty = False

248

self._initempty = False

249

except IOError, inst:

249

except IOError, inst:

250

if inst.errno != errno.ENOENT:

250

if inst.errno != errno.ENOENT:

251

raise

251

raise

252

253

self.version = v

253

self.version = v

254

self._inline = v & REVLOGNGINLINEDATA

254

self._inline = v & REVLOGNGINLINEDATA

255

self._generaldelta = v & REVLOGGENERALDELTA

255

self._generaldelta = v & REVLOGGENERALDELTA

256

flags = v & ~0xFFFF

256

flags = v & ~0xFFFF

257

fmt = v & 0xFFFF

257

fmt = v & 0xFFFF

258

if fmt == REVLOGV0 and flags:

258

if fmt == REVLOGV0 and flags:

259

raise RevlogError(_("index %s unknown flags %#04x for format v0")

259

raise RevlogError(_("index %s unknown flags %#04x for format v0")

260

% (self.indexfile, flags >> 16))

260

% (self.indexfile, flags >> 16))

261

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

261

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

262

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

262

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

263

% (self.indexfile, flags >> 16))

263

% (self.indexfile, flags >> 16))

264

elif fmt > REVLOGNG:

264

elif fmt > REVLOGNG:

265

raise RevlogError(_("index %s unknown format %d")

265

raise RevlogError(_("index %s unknown format %d")

266

% (self.indexfile, fmt))

266

% (self.indexfile, fmt))

267

268

self._io = revlogio()

268

self._io = revlogio()

269

if self.version == REVLOGV0:

269

if self.version == REVLOGV0:

270

self._io = revlogoldio()

270

self._io = revlogoldio()

271

try:

271

try:

272

d = self._io.parseindex(i, self._inline)

272

d = self._io.parseindex(i, self._inline)

273

except (ValueError, IndexError):

273

except (ValueError, IndexError):

274

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

274

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

275

self.index, nodemap, self._chunkcache = d

275

self.index, nodemap, self._chunkcache = d

276

if nodemap is not None:

276

if nodemap is not None:

277

self.nodemap = self._nodecache = nodemap

277

self.nodemap = self._nodecache = nodemap

278

if not self._chunkcache:

278

if not self._chunkcache:

279

self._chunkclear()

279

self._chunkclear()

280

281

def tip(self):

281

def tip(self):

282

return self.node(len(self.index) - 2)

282

return self.node(len(self.index) - 2)

283

def __len__(self):

283

def __len__(self):

284

return len(self.index) - 1

284

return len(self.index) - 1

285

def __iter__(self):

285

def __iter__(self):

286

for i in xrange(len(self)):

286

for i in xrange(len(self)):

287

yield i

287

yield i

288

289

@util.propertycache

289

@util.propertycache

290

def nodemap(self):

290

def nodemap(self):

291

self.rev(self.node(0))

291

self.rev(self.node(0))

292

return self._nodecache

292

return self._nodecache

293

294

def hasnode(self, node):

294

def hasnode(self, node):

295

try:

295

try:

296

self.rev(node)

296

self.rev(node)

297

return True

297

return True

298

except KeyError:

298

except KeyError:

299

return False

299

return False

300

301

def clearcaches(self):

301

def clearcaches(self):

302

try:

302

try:

303

self._nodecache.clearcaches()

303

self._nodecache.clearcaches()

304

except AttributeError:

304

except AttributeError:

305

self._nodecache = {nullid: nullrev}

305

self._nodecache = {nullid: nullrev}

306

self._nodepos = None

306

self._nodepos = None

307

308

def rev(self, node):

308

def rev(self, node):

309

try:

309

try:

310

return self._nodecache[node]

310

return self._nodecache[node]

311

except RevlogError:

311

except RevlogError:

312

# parsers.c radix tree lookup failed

312

# parsers.c radix tree lookup failed

313

raise LookupError(node, self.indexfile, _('no node'))

313

raise LookupError(node, self.indexfile, _('no node'))

314

except KeyError:

314

except KeyError:

315

# pure python cache lookup failed

315

# pure python cache lookup failed

316

n = self._nodecache

316

n = self._nodecache

317

i = self.index

317

i = self.index

318

p = self._nodepos

318

p = self._nodepos

319

if p is None:

319

if p is None:

320

p = len(i) - 2

320

p = len(i) - 2

321

for r in xrange(p, -1, -1):

321

for r in xrange(p, -1, -1):

322

v = i[r][7]

322

v = i[r][7]

323

n[v] = r

323

n[v] = r

324

if v == node:

324

if v == node:

325

self._nodepos = r - 1

325

self._nodepos = r - 1

326

return r

326

return r

327

raise LookupError(node, self.indexfile, _('no node'))

327

raise LookupError(node, self.indexfile, _('no node'))

328

329

def node(self, rev):

329

def node(self, rev):

330

return self.index[rev][7]

330

return self.index[rev][7]

331

def linkrev(self, rev):

331

def linkrev(self, rev):

332

return self.index[rev][4]

332

return self.index[rev][4]

333

def parents(self, node):

333

def parents(self, node):

334

i = self.index

334

i = self.index

335

d = i[self.rev(node)]

335

d = i[self.rev(node)]

336

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

336

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

337

def parentrevs(self, rev):

337

def parentrevs(self, rev):

338

return self.index[rev][5:7]

338

return self.index[rev][5:7]

339

def start(self, rev):

339

def start(self, rev):

340

return int(self.index[rev][0] >> 16)

340

return int(self.index[rev][0] >> 16)

341

def end(self, rev):

341

def end(self, rev):

342

return self.start(rev) + self.length(rev)

342

return self.start(rev) + self.length(rev)

343

def length(self, rev):

343

def length(self, rev):

344

return self.index[rev][1]

344

return self.index[rev][1]

345

def chainbase(self, rev):

345

def chainbase(self, rev):

346

index = self.index

346

index = self.index

347

base = index[rev][3]

347

base = index[rev][3]

348

while base != rev:

348

while base != rev:

349

rev = base

349

rev = base

350

base = index[rev][3]

350

base = index[rev][3]

351

return base

351

return base

352

def chainlength(self, rev):

353

index = self.index

354

base = index[rev][3]

355

length = index[rev][1]

356

while base != rev:

357

rev = base

358

base = index[rev][3]

359

length = length + index[rev][1]

360

return length

352

def flags(self, rev):

361

def flags(self, rev):

353

return self.index[rev][0] & 0xFFFF

362

return self.index[rev][0] & 0xFFFF

354

def rawsize(self, rev):

363

def rawsize(self, rev):

355

"""return the length of the uncompressed text for a given revision"""

364

"""return the length of the uncompressed text for a given revision"""

356

l = self.index[rev][2]

365

l = self.index[rev][2]

357

if l >= 0:

366

if l >= 0:

358

return l

367

return l

359

368

360

t = self.revision(self.node(rev))

369

t = self.revision(self.node(rev))

361

return len(t)

370

return len(t)

362

size = rawsize

371

size = rawsize

363

372

364

def ancestors(self, revs, stoprev=0):

373

def ancestors(self, revs, stoprev=0):

365

"""Generate the ancestors of 'revs' in reverse topological order.

374

"""Generate the ancestors of 'revs' in reverse topological order.

366

Does not generate revs lower than stoprev.

375

Does not generate revs lower than stoprev.

367

376

368

Yield a sequence of revision numbers starting with the parents

377

Yield a sequence of revision numbers starting with the parents

369

of each revision in revs, i.e., each revision is *not* considered

378

of each revision in revs, i.e., each revision is *not* considered

370

an ancestor of itself. Results are in breadth-first order:

379

an ancestor of itself. Results are in breadth-first order:

371

parents of each rev in revs, then parents of those, etc. Result

380

parents of each rev in revs, then parents of those, etc. Result

372

does not include the null revision."""

381

does not include the null revision."""

373

visit = util.deque(revs)

382

visit = util.deque(revs)

374

seen = set([nullrev])

383

seen = set([nullrev])

375

while visit:

384

while visit:

376

for parent in self.parentrevs(visit.popleft()):

385

for parent in self.parentrevs(visit.popleft()):

377

if parent < stoprev:

386

if parent < stoprev:

378

continue

387

continue

379

if parent not in seen:

388

if parent not in seen:

380

visit.append(parent)

389

visit.append(parent)

381

seen.add(parent)

390

seen.add(parent)

382

yield parent

391

yield parent

383

392

384

def incancestors(self, revs, stoprev=0):

393

def incancestors(self, revs, stoprev=0):

385

"""Identical to ancestors() except it also generates the

394

"""Identical to ancestors() except it also generates the

386

revisions, 'revs'"""

395

revisions, 'revs'"""

387

for rev in revs:

396

for rev in revs:

388

yield rev

397

yield rev

389

for rev in self.ancestors(revs, stoprev):

398

for rev in self.ancestors(revs, stoprev):

390

yield rev

399

yield rev

391

400

392

def descendants(self, revs):

401

def descendants(self, revs):

393

"""Generate the descendants of 'revs' in revision order.

402

"""Generate the descendants of 'revs' in revision order.

394

403

395

Yield a sequence of revision numbers starting with a child of

404

Yield a sequence of revision numbers starting with a child of

396

some rev in revs, i.e., each revision is *not* considered a

405

some rev in revs, i.e., each revision is *not* considered a

397

descendant of itself. Results are ordered by revision number (a

406

descendant of itself. Results are ordered by revision number (a

398

topological sort)."""

407

topological sort)."""

399

first = min(revs)

408

first = min(revs)

400

if first == nullrev:

409

if first == nullrev:

401

for i in self:

410

for i in self:

402

yield i

411

yield i

403

return

412

return

404

413

405

seen = set(revs)

414

seen = set(revs)

406

for i in xrange(first + 1, len(self)):

415

for i in xrange(first + 1, len(self)):

407

for x in self.parentrevs(i):

416

for x in self.parentrevs(i):

408

if x != nullrev and x in seen:

417

if x != nullrev and x in seen:

409

seen.add(i)

418

seen.add(i)

410

yield i

419

yield i

411

break

420

break

412

421

413

def findcommonmissing(self, common=None, heads=None):

422

def findcommonmissing(self, common=None, heads=None):

414

"""Return a tuple of the ancestors of common and the ancestors of heads

423

"""Return a tuple of the ancestors of common and the ancestors of heads

415

that are not ancestors of common. In revset terminology, we return the

424

that are not ancestors of common. In revset terminology, we return the

416

tuple:

425

tuple:

417

426

418

::common, (::heads) - (::common)

427

::common, (::heads) - (::common)

419

428

420

The list is sorted by revision number, meaning it is

429

The list is sorted by revision number, meaning it is

421

topologically sorted.

430

topologically sorted.

422

431

423

'heads' and 'common' are both lists of node IDs. If heads is

432

'heads' and 'common' are both lists of node IDs. If heads is

424

not supplied, uses all of the revlog's heads. If common is not

433

not supplied, uses all of the revlog's heads. If common is not

425

supplied, uses nullid."""

434

supplied, uses nullid."""

426

if common is None:

435

if common is None:

427

common = [nullid]

436

common = [nullid]

428

if heads is None:

437

if heads is None:

429

heads = self.heads()

438

heads = self.heads()

430

439

431

common = [self.rev(n) for n in common]

440

common = [self.rev(n) for n in common]

432

heads = [self.rev(n) for n in heads]

441

heads = [self.rev(n) for n in heads]

433

442

434

# we want the ancestors, but inclusive

443

# we want the ancestors, but inclusive

435

has = set(self.ancestors(common))

444

has = set(self.ancestors(common))

436

has.add(nullrev)

445

has.add(nullrev)

437

has.update(common)

446

has.update(common)

438

447

439

# take all ancestors from heads that aren't in has

448

# take all ancestors from heads that aren't in has

440

missing = set()

449

missing = set()

441

visit = util.deque(r for r in heads if r not in has)

450

visit = util.deque(r for r in heads if r not in has)

442

while visit:

451

while visit:

443

r = visit.popleft()

452

r = visit.popleft()

444

if r in missing:

453

if r in missing:

445

continue

454

continue

446

else:

455

else:

447

missing.add(r)

456

missing.add(r)

448

for p in self.parentrevs(r):

457

for p in self.parentrevs(r):

449

if p not in has:

458

if p not in has:

450

visit.append(p)

459

visit.append(p)

451

missing = list(missing)

460

missing = list(missing)

452

missing.sort()

461

missing.sort()

453

return has, [self.node(r) for r in missing]

462

return has, [self.node(r) for r in missing]

454

463

455

def findmissing(self, common=None, heads=None):

464

def findmissing(self, common=None, heads=None):

456

"""Return the ancestors of heads that are not ancestors of common.

465

"""Return the ancestors of heads that are not ancestors of common.

457

466

458

More specifically, return a list of nodes N such that every N

467

More specifically, return a list of nodes N such that every N

459

satisfies the following constraints:

468

satisfies the following constraints:

460

469

461

1. N is an ancestor of some node in 'heads'

470

1. N is an ancestor of some node in 'heads'

462

2. N is not an ancestor of any node in 'common'

471

2. N is not an ancestor of any node in 'common'

463

472

464

The list is sorted by revision number, meaning it is

473

The list is sorted by revision number, meaning it is

465

topologically sorted.

474

topologically sorted.

466

475

467

'heads' and 'common' are both lists of node IDs. If heads is

476

'heads' and 'common' are both lists of node IDs. If heads is

468

not supplied, uses all of the revlog's heads. If common is not

477

not supplied, uses all of the revlog's heads. If common is not

469

supplied, uses nullid."""

478

supplied, uses nullid."""

470

_common, missing = self.findcommonmissing(common, heads)

479

_common, missing = self.findcommonmissing(common, heads)

471

return missing

480

return missing

472

481

473

def nodesbetween(self, roots=None, heads=None):

482

def nodesbetween(self, roots=None, heads=None):

474

"""Return a topological path from 'roots' to 'heads'.

483

"""Return a topological path from 'roots' to 'heads'.

475

484

476

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

485

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

477

topologically sorted list of all nodes N that satisfy both of

486

topologically sorted list of all nodes N that satisfy both of

478

these constraints:

487

these constraints:

479

488

480

1. N is a descendant of some node in 'roots'

489

1. N is a descendant of some node in 'roots'

481

2. N is an ancestor of some node in 'heads'

490

2. N is an ancestor of some node in 'heads'

482

491

483

Every node is considered to be both a descendant and an ancestor

492

Every node is considered to be both a descendant and an ancestor

484

of itself, so every reachable node in 'roots' and 'heads' will be

493

of itself, so every reachable node in 'roots' and 'heads' will be

485

included in 'nodes'.

494

included in 'nodes'.

486

495

487

'outroots' is the list of reachable nodes in 'roots', i.e., the

496

'outroots' is the list of reachable nodes in 'roots', i.e., the

488

subset of 'roots' that is returned in 'nodes'. Likewise,

497

subset of 'roots' that is returned in 'nodes'. Likewise,

489

'outheads' is the subset of 'heads' that is also in 'nodes'.

498

'outheads' is the subset of 'heads' that is also in 'nodes'.

490

499

491

'roots' and 'heads' are both lists of node IDs. If 'roots' is

500

'roots' and 'heads' are both lists of node IDs. If 'roots' is

492

unspecified, uses nullid as the only root. If 'heads' is

501

unspecified, uses nullid as the only root. If 'heads' is

493

unspecified, uses list of all of the revlog's heads."""

502

unspecified, uses list of all of the revlog's heads."""

494

nonodes = ([], [], [])

503

nonodes = ([], [], [])

495

if roots is not None:

504

if roots is not None:

496

roots = list(roots)

505

roots = list(roots)

497

if not roots:

506

if not roots:

498

return nonodes

507

return nonodes

499

lowestrev = min([self.rev(n) for n in roots])

508

lowestrev = min([self.rev(n) for n in roots])

500

else:

509

else:

501

roots = [nullid] # Everybody's a descendant of nullid

510

roots = [nullid] # Everybody's a descendant of nullid

502

lowestrev = nullrev

511

lowestrev = nullrev

503

if (lowestrev == nullrev) and (heads is None):

512

if (lowestrev == nullrev) and (heads is None):

504

# We want _all_ the nodes!

513

# We want _all_ the nodes!

505

return ([self.node(r) for r in self], [nullid], list(self.heads()))

514

return ([self.node(r) for r in self], [nullid], list(self.heads()))

506

if heads is None:

515

if heads is None:

507

# All nodes are ancestors, so the latest ancestor is the last

516

# All nodes are ancestors, so the latest ancestor is the last

508

# node.

517

# node.

509

highestrev = len(self) - 1

518

highestrev = len(self) - 1

510

# Set ancestors to None to signal that every node is an ancestor.

519

# Set ancestors to None to signal that every node is an ancestor.

511

ancestors = None

520

ancestors = None

512

# Set heads to an empty dictionary for later discovery of heads

521

# Set heads to an empty dictionary for later discovery of heads

513

heads = {}

522

heads = {}

514

else:

523

else:

515

heads = list(heads)

524

heads = list(heads)

516

if not heads:

525

if not heads:

517

return nonodes

526

return nonodes

518

ancestors = set()

527

ancestors = set()

519

# Turn heads into a dictionary so we can remove 'fake' heads.

528

# Turn heads into a dictionary so we can remove 'fake' heads.

520

# Also, later we will be using it to filter out the heads we can't

529

# Also, later we will be using it to filter out the heads we can't

521

# find from roots.

530

# find from roots.

522

heads = dict.fromkeys(heads, False)

531

heads = dict.fromkeys(heads, False)

523

# Start at the top and keep marking parents until we're done.

532

# Start at the top and keep marking parents until we're done.

524

nodestotag = set(heads)

533

nodestotag = set(heads)

525

# Remember where the top was so we can use it as a limit later.

534

# Remember where the top was so we can use it as a limit later.

526

highestrev = max([self.rev(n) for n in nodestotag])

535

highestrev = max([self.rev(n) for n in nodestotag])

527

while nodestotag:

536

while nodestotag:

528

# grab a node to tag

537

# grab a node to tag

529

n = nodestotag.pop()

538

n = nodestotag.pop()

530

# Never tag nullid

539

# Never tag nullid

531

if n == nullid:

540

if n == nullid:

532

continue

541

continue

533

# A node's revision number represents its place in a

542

# A node's revision number represents its place in a

534

# topologically sorted list of nodes.

543

# topologically sorted list of nodes.

535

r = self.rev(n)

544

r = self.rev(n)

536

if r >= lowestrev:

545

if r >= lowestrev:

537

if n not in ancestors:

546

if n not in ancestors:

538

# If we are possibly a descendant of one of the roots

547

# If we are possibly a descendant of one of the roots

539

# and we haven't already been marked as an ancestor

548

# and we haven't already been marked as an ancestor

540

ancestors.add(n) # Mark as ancestor

549

ancestors.add(n) # Mark as ancestor

541

# Add non-nullid parents to list of nodes to tag.

550

# Add non-nullid parents to list of nodes to tag.

542

nodestotag.update([p for p in self.parents(n) if

551

nodestotag.update([p for p in self.parents(n) if

543

p != nullid])

552

p != nullid])

544

elif n in heads: # We've seen it before, is it a fake head?

553

elif n in heads: # We've seen it before, is it a fake head?

545

# So it is, real heads should not be the ancestors of

554

# So it is, real heads should not be the ancestors of

546

# any other heads.

555

# any other heads.

547

heads.pop(n)

556

heads.pop(n)

548

if not ancestors:

557

if not ancestors:

549

return nonodes

558

return nonodes

550

# Now that we have our set of ancestors, we want to remove any

559

# Now that we have our set of ancestors, we want to remove any

551

# roots that are not ancestors.

560

# roots that are not ancestors.

552

561

553

# If one of the roots was nullid, everything is included anyway.

562

# If one of the roots was nullid, everything is included anyway.

554

if lowestrev > nullrev:

563

if lowestrev > nullrev:

555

# But, since we weren't, let's recompute the lowest rev to not

564

# But, since we weren't, let's recompute the lowest rev to not

556

# include roots that aren't ancestors.

565

# include roots that aren't ancestors.

557

566

558

# Filter out roots that aren't ancestors of heads

567

# Filter out roots that aren't ancestors of heads

559

roots = [n for n in roots if n in ancestors]

568

roots = [n for n in roots if n in ancestors]

560

# Recompute the lowest revision

569

# Recompute the lowest revision

561

if roots:

570

if roots:

562

lowestrev = min([self.rev(n) for n in roots])

571

lowestrev = min([self.rev(n) for n in roots])

563

else:

572

else:

564

# No more roots? Return empty list

573

# No more roots? Return empty list

565

return nonodes

574

return nonodes

566

else:

575

else:

567

# We are descending from nullid, and don't need to care about

576

# We are descending from nullid, and don't need to care about

568

# any other roots.

577

# any other roots.

569

lowestrev = nullrev

578

lowestrev = nullrev

570

roots = [nullid]

579

roots = [nullid]

571

# Transform our roots list into a set.

580

# Transform our roots list into a set.

572

descendants = set(roots)

581

descendants = set(roots)

573

# Also, keep the original roots so we can filter out roots that aren't

582

# Also, keep the original roots so we can filter out roots that aren't

574

# 'real' roots (i.e. are descended from other roots).

583

# 'real' roots (i.e. are descended from other roots).

575

roots = descendants.copy()

584

roots = descendants.copy()

576

# Our topologically sorted list of output nodes.

585

# Our topologically sorted list of output nodes.

577

orderedout = []

586

orderedout = []

578

# Don't start at nullid since we don't want nullid in our output list,

587

# Don't start at nullid since we don't want nullid in our output list,

579

# and if nullid shows up in descedents, empty parents will look like

588

# and if nullid shows up in descedents, empty parents will look like

580

# they're descendants.

589

# they're descendants.

581

for r in xrange(max(lowestrev, 0), highestrev + 1):

590

for r in xrange(max(lowestrev, 0), highestrev + 1):

582

n = self.node(r)

591

n = self.node(r)

583

isdescendant = False

592

isdescendant = False

584

if lowestrev == nullrev: # Everybody is a descendant of nullid

593

if lowestrev == nullrev: # Everybody is a descendant of nullid

585

isdescendant = True

594

isdescendant = True

586

elif n in descendants:

595

elif n in descendants:

587

# n is already a descendant

596

# n is already a descendant

588

isdescendant = True

597

isdescendant = True

589

# This check only needs to be done here because all the roots

598

# This check only needs to be done here because all the roots

590

# will start being marked is descendants before the loop.

599

# will start being marked is descendants before the loop.

591

if n in roots:

600

if n in roots:

592

# If n was a root, check if it's a 'real' root.

601

# If n was a root, check if it's a 'real' root.

593

p = tuple(self.parents(n))

602

p = tuple(self.parents(n))

594

# If any of its parents are descendants, it's not a root.

603

# If any of its parents are descendants, it's not a root.

595

if (p[0] in descendants) or (p[1] in descendants):

604

if (p[0] in descendants) or (p[1] in descendants):

596

roots.remove(n)

605

roots.remove(n)

597

else:

606

else:

598

p = tuple(self.parents(n))

607

p = tuple(self.parents(n))

599

# A node is a descendant if either of its parents are

608

# A node is a descendant if either of its parents are

600

# descendants. (We seeded the dependents list with the roots

609

# descendants. (We seeded the dependents list with the roots

601

# up there, remember?)

610

# up there, remember?)

602

if (p[0] in descendants) or (p[1] in descendants):

611

if (p[0] in descendants) or (p[1] in descendants):

603

descendants.add(n)

612

descendants.add(n)

604

isdescendant = True

613

isdescendant = True

605

if isdescendant and ((ancestors is None) or (n in ancestors)):

614

if isdescendant and ((ancestors is None) or (n in ancestors)):

606

# Only include nodes that are both descendants and ancestors.

615

# Only include nodes that are both descendants and ancestors.

607

orderedout.append(n)

616

orderedout.append(n)

608

if (ancestors is not None) and (n in heads):

617

if (ancestors is not None) and (n in heads):

609

# We're trying to figure out which heads are reachable

618

# We're trying to figure out which heads are reachable

610

# from roots.

619

# from roots.

611

# Mark this head as having been reached

620

# Mark this head as having been reached

612

heads[n] = True

621

heads[n] = True

613

elif ancestors is None:

622

elif ancestors is None:

614

# Otherwise, we're trying to discover the heads.

623

# Otherwise, we're trying to discover the heads.

615

# Assume this is a head because if it isn't, the next step

624

# Assume this is a head because if it isn't, the next step

616

# will eventually remove it.

625

# will eventually remove it.

617

heads[n] = True

626

heads[n] = True

618

# But, obviously its parents aren't.

627

# But, obviously its parents aren't.

619

for p in self.parents(n):

628

for p in self.parents(n):

620

heads.pop(p, None)

629

heads.pop(p, None)

621

heads = [n for n, flag in heads.iteritems() if flag]

630

heads = [n for n, flag in heads.iteritems() if flag]

622

roots = list(roots)

631

roots = list(roots)

623

assert orderedout

632

assert orderedout

624

assert roots

633

assert roots

625

assert heads

634

assert heads

626

return (orderedout, roots, heads)

635

return (orderedout, roots, heads)

627

636

628

def headrevs(self):

637

def headrevs(self):

629

try:

638

try:

630

return self.index.headrevs()

639

return self.index.headrevs()

631

except AttributeError:

640

except AttributeError:

632

pass

641

pass

633

count = len(self)

642

count = len(self)

634

if not count:

643

if not count:

635

return [nullrev]

644

return [nullrev]

636

ishead = [1] * (count + 1)

645

ishead = [1] * (count + 1)

637

index = self.index

646

index = self.index

638

for r in xrange(count):

647

for r in xrange(count):

639

e = index[r]

648

e = index[r]

640

ishead[e[5]] = ishead[e[6]] = 0

649

ishead[e[5]] = ishead[e[6]] = 0

641

return [r for r in xrange(count) if ishead[r]]

650

return [r for r in xrange(count) if ishead[r]]

642

651

643

def heads(self, start=None, stop=None):

652

def heads(self, start=None, stop=None):

644

"""return the list of all nodes that have no children

653

"""return the list of all nodes that have no children

645

654

646

if start is specified, only heads that are descendants of

655

if start is specified, only heads that are descendants of

647

start will be returned

656

start will be returned

648

if stop is specified, it will consider all the revs from stop

657

if stop is specified, it will consider all the revs from stop

649

as if they had no children

658

as if they had no children

650

"""

659

"""

651

if start is None and stop is None:

660

if start is None and stop is None:

652

if not len(self):

661

if not len(self):

653

return [nullid]

662

return [nullid]

654

return [self.node(r) for r in self.headrevs()]

663

return [self.node(r) for r in self.headrevs()]

655

664

656

if start is None:

665

if start is None:

657

start = nullid

666

start = nullid

658

if stop is None:

667

if stop is None:

659

stop = []

668

stop = []

660

stoprevs = set([self.rev(n) for n in stop])

669

stoprevs = set([self.rev(n) for n in stop])

661

startrev = self.rev(start)

670

startrev = self.rev(start)

662

reachable = set((startrev,))

671

reachable = set((startrev,))

663

heads = set((startrev,))

672

heads = set((startrev,))

664

673

665

parentrevs = self.parentrevs

674

parentrevs = self.parentrevs

666

for r in xrange(startrev + 1, len(self)):

675

for r in xrange(startrev + 1, len(self)):

667

for p in parentrevs(r):

676

for p in parentrevs(r):

668

if p in reachable:

677

if p in reachable:

669

if r not in stoprevs:

678

if r not in stoprevs:

670

reachable.add(r)

679

reachable.add(r)

671

heads.add(r)

680

heads.add(r)

672

if p in heads and p not in stoprevs:

681

if p in heads and p not in stoprevs:

673

heads.remove(p)

682

heads.remove(p)

674

683

675

return [self.node(r) for r in heads]

684

return [self.node(r) for r in heads]

676

685

677

def children(self, node):

686

def children(self, node):

678

"""find the children of a given node"""

687

"""find the children of a given node"""

679

c = []

688

c = []

680

p = self.rev(node)

689

p = self.rev(node)

681

for r in range(p + 1, len(self)):

690

for r in range(p + 1, len(self)):

682

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

691

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

683

if prevs:

692

if prevs:

684

for pr in prevs:

693

for pr in prevs:

685

if pr == p:

694

if pr == p:

686

c.append(self.node(r))

695

c.append(self.node(r))

687

elif p == nullrev:

696

elif p == nullrev:

688

c.append(self.node(r))

697

c.append(self.node(r))

689

return c

698

return c

690

699

691

def descendant(self, start, end):

700

def descendant(self, start, end):

692

if start == nullrev:

701

if start == nullrev:

693

return True

702

return True

694

for i in self.descendants([start]):

703

for i in self.descendants([start]):

695

if i == end:

704

if i == end:

696

return True

705

return True

697

elif i > end:

706

elif i > end:

698

break

707

break

699

return False

708

return False

700

709

701

def ancestor(self, a, b):

710

def ancestor(self, a, b):

702

"""calculate the least common ancestor of nodes a and b"""

711

"""calculate the least common ancestor of nodes a and b"""

703

712

704

# fast path, check if it is a descendant

713

# fast path, check if it is a descendant

705

a, b = self.rev(a), self.rev(b)

714

a, b = self.rev(a), self.rev(b)

706

start, end = sorted((a, b))

715

start, end = sorted((a, b))

707

if self.descendant(start, end):

716

if self.descendant(start, end):

708

return self.node(start)

717

return self.node(start)

709

718

710

def parents(rev):

719

def parents(rev):

711

return [p for p in self.parentrevs(rev) if p != nullrev]

720

return [p for p in self.parentrevs(rev) if p != nullrev]

712

721

713

c = ancestor.ancestor(a, b, parents)

722

c = ancestor.ancestor(a, b, parents)

714

if c is None:

723

if c is None:

715

return nullid

724

return nullid

716

725

717

return self.node(c)

726

return self.node(c)

718

727

719

def _match(self, id):

728

def _match(self, id):

720

if isinstance(id, int):

729

if isinstance(id, int):

721

# rev

730

# rev

722

return self.node(id)

731

return self.node(id)

723

if len(id) == 20:

732

if len(id) == 20:

724

# possibly a binary node

733

# possibly a binary node

725

# odds of a binary node being all hex in ASCII are 1 in 10**25

734

# odds of a binary node being all hex in ASCII are 1 in 10**25

726

try:

735

try:

727

node = id

736

node = id

728

self.rev(node) # quick search the index

737

self.rev(node) # quick search the index

729

return node

738

return node

730

except LookupError:

739

except LookupError:

731

pass # may be partial hex id

740

pass # may be partial hex id

732

try:

741

try:

733

# str(rev)

742

# str(rev)

734

rev = int(id)

743

rev = int(id)

735

if str(rev) != id:

744

if str(rev) != id:

736

raise ValueError

745

raise ValueError

737

if rev < 0:

746

if rev < 0:

738

rev = len(self) + rev

747

rev = len(self) + rev

739

if rev < 0 or rev >= len(self):

748

if rev < 0 or rev >= len(self):

740

raise ValueError

749

raise ValueError

741

return self.node(rev)

750

return self.node(rev)

742

except (ValueError, OverflowError):

751

except (ValueError, OverflowError):

743

pass

752

pass

744

if len(id) == 40:

753

if len(id) == 40:

745

try:

754

try:

746

# a full hex nodeid?

755

# a full hex nodeid?

747

node = bin(id)

756

node = bin(id)

748

self.rev(node)

757

self.rev(node)

749

return node

758

return node

750

except (TypeError, LookupError):

759

except (TypeError, LookupError):

751

pass

760

pass

752

761

753

def _partialmatch(self, id):

762

def _partialmatch(self, id):

754

try:

763

try:

755

return self.index.partialmatch(id)

764

return self.index.partialmatch(id)

756

except RevlogError:

765

except RevlogError:

757

# parsers.c radix tree lookup gave multiple matches

766

# parsers.c radix tree lookup gave multiple matches

758

raise LookupError(id, self.indexfile, _("ambiguous identifier"))

767

raise LookupError(id, self.indexfile, _("ambiguous identifier"))

759

except (AttributeError, ValueError):

768

except (AttributeError, ValueError):

760

# we are pure python, or key was too short to search radix tree

769

# we are pure python, or key was too short to search radix tree

761

pass

770

pass

762

771

763

if id in self._pcache:

772

if id in self._pcache:

764

return self._pcache[id]

773

return self._pcache[id]

765

774

766

if len(id) < 40:

775

if len(id) < 40:

767

try:

776

try:

768

# hex(node)[:...]

777

# hex(node)[:...]

769

l = len(id) // 2 # grab an even number of digits

778

l = len(id) // 2 # grab an even number of digits

770

prefix = bin(id[:l * 2])

779

prefix = bin(id[:l * 2])

771

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

780

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

772

nl = [n for n in nl if hex(n).startswith(id)]

781

nl = [n for n in nl if hex(n).startswith(id)]

773

if len(nl) > 0:

782

if len(nl) > 0:

774

if len(nl) == 1:

783

if len(nl) == 1:

775

self._pcache[id] = nl[0]

784

self._pcache[id] = nl[0]

776

return nl[0]

785

return nl[0]

777

raise LookupError(id, self.indexfile,

786

raise LookupError(id, self.indexfile,

778

_('ambiguous identifier'))

787

_('ambiguous identifier'))

779

return None

788

return None

780

except TypeError:

789

except TypeError:

781

pass

790

pass

782

791

783

def lookup(self, id):

792

def lookup(self, id):

784

"""locate a node based on:

793

"""locate a node based on:

785

- revision number or str(revision number)

794

- revision number or str(revision number)

786

- nodeid or subset of hex nodeid

795

- nodeid or subset of hex nodeid

787

"""

796

"""

788

n = self._match(id)

797

n = self._match(id)

789

if n is not None:

798

if n is not None:

790

return n

799

return n

791

n = self._partialmatch(id)

800

n = self._partialmatch(id)

792

if n:

801

if n:

793

return n

802

return n

794

803

795

raise LookupError(id, self.indexfile, _('no match found'))

804

raise LookupError(id, self.indexfile, _('no match found'))

796

805

797

def cmp(self, node, text):

806

def cmp(self, node, text):

798

"""compare text with a given file revision

807

"""compare text with a given file revision

799

808

800

returns True if text is different than what is stored.

809

returns True if text is different than what is stored.

801

"""

810

"""

802

p1, p2 = self.parents(node)

811

p1, p2 = self.parents(node)

803

return hash(text, p1, p2) != node

812

return hash(text, p1, p2) != node

804

813

805

def _addchunk(self, offset, data):

814

def _addchunk(self, offset, data):

806

o, d = self._chunkcache

815

o, d = self._chunkcache

807

# try to add to existing cache

816

# try to add to existing cache

808

if o + len(d) == offset and len(d) + len(data) < _chunksize:

817

if o + len(d) == offset and len(d) + len(data) < _chunksize:

809

self._chunkcache = o, d + data

818

self._chunkcache = o, d + data

810

else:

819

else:

811

self._chunkcache = offset, data

820

self._chunkcache = offset, data

812

821

813

def _loadchunk(self, offset, length):

822

def _loadchunk(self, offset, length):

814

if self._inline:

823

if self._inline:

815

df = self.opener(self.indexfile)

824

df = self.opener(self.indexfile)

816

else:

825

else:

817

df = self.opener(self.datafile)

826

df = self.opener(self.datafile)

818

827

819

readahead = max(65536, length)

828

readahead = max(65536, length)

820

df.seek(offset)

829

df.seek(offset)

821

d = df.read(readahead)

830

d = df.read(readahead)

822

df.close()

831

df.close()

823

self._addchunk(offset, d)

832

self._addchunk(offset, d)

824

if readahead > length:

833

if readahead > length:

825

return util.buffer(d, 0, length)

834

return util.buffer(d, 0, length)

826

return d

835

return d

827

836

828

def _getchunk(self, offset, length):

837

def _getchunk(self, offset, length):

829

o, d = self._chunkcache

838

o, d = self._chunkcache

830

l = len(d)

839

l = len(d)

831

840

832

# is it in the cache?

841

# is it in the cache?

833

cachestart = offset - o

842

cachestart = offset - o

834

cacheend = cachestart + length

843

cacheend = cachestart + length

835

if cachestart >= 0 and cacheend <= l:

844

if cachestart >= 0 and cacheend <= l:

836

if cachestart == 0 and cacheend == l:

845

if cachestart == 0 and cacheend == l:

837

return d # avoid a copy

846

return d # avoid a copy

838

return util.buffer(d, cachestart, cacheend - cachestart)

847

return util.buffer(d, cachestart, cacheend - cachestart)

839

848

840

return self._loadchunk(offset, length)

849

return self._loadchunk(offset, length)

841

850

842

def _chunkraw(self, startrev, endrev):

851

def _chunkraw(self, startrev, endrev):

843

start = self.start(startrev)

852

start = self.start(startrev)

844

length = self.end(endrev) - start

853

length = self.end(endrev) - start

845

if self._inline:

854

if self._inline:

846

start += (startrev + 1) * self._io.size

855

start += (startrev + 1) * self._io.size

847

return self._getchunk(start, length)

856

return self._getchunk(start, length)

848

857

849

def _chunk(self, rev):

858

def _chunk(self, rev):

850

return decompress(self._chunkraw(rev, rev))

859

return decompress(self._chunkraw(rev, rev))

851

860

852

def _chunkbase(self, rev):

861

def _chunkbase(self, rev):

853

return self._chunk(rev)

862

return self._chunk(rev)

854

863

855

def _chunkclear(self):

864

def _chunkclear(self):

856

self._chunkcache = (0, '')

865

self._chunkcache = (0, '')

857

866

858

def deltaparent(self, rev):

867

def deltaparent(self, rev):

859

"""return deltaparent of the given revision"""

868

"""return deltaparent of the given revision"""

860

base = self.index[rev][3]

869

base = self.index[rev][3]

861

if base == rev:

870

if base == rev:

862

return nullrev

871

return nullrev

863

elif self._generaldelta:

872

elif self._generaldelta:

864

return base

873

return base

865

else:

874

else:

866

return rev - 1

875

return rev - 1

867

876

868

def revdiff(self, rev1, rev2):

877

def revdiff(self, rev1, rev2):

869

"""return or calculate a delta between two revisions"""

878

"""return or calculate a delta between two revisions"""

870

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

879

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

871

return str(self._chunk(rev2))

880

return str(self._chunk(rev2))

872

881

873

return mdiff.textdiff(self.revision(rev1),

882

return mdiff.textdiff(self.revision(rev1),

874

self.revision(rev2))

883

self.revision(rev2))

875

884

876

def revision(self, nodeorrev):

885

def revision(self, nodeorrev):

877

"""return an uncompressed revision of a given node or revision

886

"""return an uncompressed revision of a given node or revision

878

number.

887

number.

879

"""

888

"""

880

if isinstance(nodeorrev, int):

889

if isinstance(nodeorrev, int):

881

rev = nodeorrev

890

rev = nodeorrev

882

node = self.node(rev)

891

node = self.node(rev)

883

else:

892

else:

884

node = nodeorrev

893

node = nodeorrev

885

rev = None

894

rev = None

886

895

887

cachedrev = None

896

cachedrev = None

888

if node == nullid:

897

if node == nullid:

889

return ""

898

return ""

890

if self._cache:

899

if self._cache:

891

if self._cache[0] == node:

900

if self._cache[0] == node:

892

return self._cache[2]

901

return self._cache[2]

893

cachedrev = self._cache[1]

902

cachedrev = self._cache[1]

894

903

895

# look up what we need to read

904

# look up what we need to read

896

text = None

905

text = None

897

if rev is None:

906

if rev is None:

898

rev = self.rev(node)

907

rev = self.rev(node)

899

908

900

# check rev flags

909

# check rev flags

901

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

910

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

902

raise RevlogError(_('incompatible revision flag %x') %

911

raise RevlogError(_('incompatible revision flag %x') %

903

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

912

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

904

913

905

# build delta chain

914

# build delta chain

906

chain = []

915

chain = []

907

index = self.index # for performance

916

index = self.index # for performance

908

generaldelta = self._generaldelta

917

generaldelta = self._generaldelta

909

iterrev = rev

918

iterrev = rev

910

e = index[iterrev]

919

e = index[iterrev]

911

while iterrev != e[3] and iterrev != cachedrev:

920

while iterrev != e[3] and iterrev != cachedrev:

912

chain.append(iterrev)

921

chain.append(iterrev)

913

if generaldelta:

922

if generaldelta:

914

iterrev = e[3]

923

iterrev = e[3]

915

else:

924

else:

916

iterrev -= 1

925

iterrev -= 1

917

e = index[iterrev]

926

e = index[iterrev]

918

chain.reverse()

927

chain.reverse()

919

base = iterrev

928

base = iterrev

920

929

921

if iterrev == cachedrev:

930

if iterrev == cachedrev:

922

# cache hit

931

# cache hit

923

text = self._cache[2]

932

text = self._cache[2]

924

933

925

# drop cache to save memory

934

# drop cache to save memory

926

self._cache = None

935

self._cache = None

927

936

928

self._chunkraw(base, rev)

937

self._chunkraw(base, rev)

929

if text is None:

938

if text is None:

930

text = str(self._chunkbase(base))

939

text = str(self._chunkbase(base))

931

940

932

bins = [self._chunk(r) for r in chain]

941

bins = [self._chunk(r) for r in chain]

933

text = mdiff.patches(text, bins)

942

text = mdiff.patches(text, bins)

934

943

935

text = self._checkhash(text, node, rev)

944

text = self._checkhash(text, node, rev)

936

945

937

self._cache = (node, rev, text)

946

self._cache = (node, rev, text)

938

return text

947

return text

939

948

940

def _checkhash(self, text, node, rev):

949

def _checkhash(self, text, node, rev):

941

p1, p2 = self.parents(node)

950

p1, p2 = self.parents(node)

942

if node != hash(text, p1, p2):

951

if node != hash(text, p1, p2):

943

raise RevlogError(_("integrity check failed on %s:%d")

952

raise RevlogError(_("integrity check failed on %s:%d")

944

% (self.indexfile, rev))

953

% (self.indexfile, rev))

945

return text

954

return text

946

955

947

def checkinlinesize(self, tr, fp=None):

956

def checkinlinesize(self, tr, fp=None):

948

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

957

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

949

return

958

return

950

959

951

trinfo = tr.find(self.indexfile)

960

trinfo = tr.find(self.indexfile)

952

if trinfo is None:

961

if trinfo is None:

953

raise RevlogError(_("%s not found in the transaction")

962

raise RevlogError(_("%s not found in the transaction")

954

% self.indexfile)

963

% self.indexfile)

955

964

956

trindex = trinfo[2]

965

trindex = trinfo[2]

957

dataoff = self.start(trindex)

966

dataoff = self.start(trindex)

958

967

959

tr.add(self.datafile, dataoff)

968

tr.add(self.datafile, dataoff)

960

969

961

if fp:

970

if fp:

962

fp.flush()

971

fp.flush()

963

fp.close()

972

fp.close()

964

973

965

df = self.opener(self.datafile, 'w')

974

df = self.opener(self.datafile, 'w')

966

try:

975

try:

967

for r in self:

976

for r in self:

968

df.write(self._chunkraw(r, r))

977

df.write(self._chunkraw(r, r))

969

finally:

978

finally:

970

df.close()

979

df.close()

971

980

972

fp = self.opener(self.indexfile, 'w', atomictemp=True)

981

fp = self.opener(self.indexfile, 'w', atomictemp=True)

973

self.version &= ~(REVLOGNGINLINEDATA)

982

self.version &= ~(REVLOGNGINLINEDATA)

974

self._inline = False

983

self._inline = False

975

for i in self:

984

for i in self:

976

e = self._io.packentry(self.index[i], self.node, self.version, i)

985

e = self._io.packentry(self.index[i], self.node, self.version, i)

977

fp.write(e)

986

fp.write(e)

978

987

979

# if we don't call close, the temp file will never replace the

988

# if we don't call close, the temp file will never replace the

980

# real index

989

# real index

981

fp.close()

990

fp.close()

982

991

983

tr.replace(self.indexfile, trindex * self._io.size)

992

tr.replace(self.indexfile, trindex * self._io.size)

984

self._chunkclear()

993

self._chunkclear()

985

994

986

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

995

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

987

"""add a revision to the log

996

"""add a revision to the log

988

997

989

text - the revision data to add

998

text - the revision data to add

990

transaction - the transaction object used for rollback

999

transaction - the transaction object used for rollback

991

link - the linkrev data to add

1000

link - the linkrev data to add

992

p1, p2 - the parent nodeids of the revision

1001

p1, p2 - the parent nodeids of the revision

993

cachedelta - an optional precomputed delta

1002

cachedelta - an optional precomputed delta

994

"""

1003

"""

995

node = hash(text, p1, p2)

1004

node = hash(text, p1, p2)

996

if node in self.nodemap:

1005

if node in self.nodemap:

997

return node

1006

return node

998

1007

999

dfh = None

1008

dfh = None

1000

if not self._inline:

1009

if not self._inline:

1001

dfh = self.opener(self.datafile, "a")

1010

dfh = self.opener(self.datafile, "a")

1002

ifh = self.opener(self.indexfile, "a+")

1011

ifh = self.opener(self.indexfile, "a+")

1003

try:

1012

try:

1004

return self._addrevision(node, text, transaction, link, p1, p2,

1013

return self._addrevision(node, text, transaction, link, p1, p2,

1005

cachedelta, ifh, dfh)

1014

cachedelta, ifh, dfh)

1006

finally:

1015

finally:

1007

if dfh:

1016

if dfh:

1008

dfh.close()

1017

dfh.close()

1009

ifh.close()

1018

ifh.close()

1010

1019

1011

def _addrevision(self, node, text, transaction, link, p1, p2,

1020

def _addrevision(self, node, text, transaction, link, p1, p2,

1012

cachedelta, ifh, dfh):

1021

cachedelta, ifh, dfh):

1013

"""internal function to add revisions to the log

1022

"""internal function to add revisions to the log

1014

1023

1015

see addrevision for argument descriptions.

1024

see addrevision for argument descriptions.

1016

invariants:

1025

invariants:

1017

- text is optional (can be None); if not set, cachedelta must be set.

1026

- text is optional (can be None); if not set, cachedelta must be set.

1018

if both are set, they must correspond to eachother.

1027

if both are set, they must correspond to eachother.

1019

"""

1028

"""

1020

btext = [text]

1029

btext = [text]

1021

def buildtext():

1030

def buildtext():

1022

if btext[0] is not None:

1031

if btext[0] is not None:

1023

return btext[0]

1032

return btext[0]

1024

# flush any pending writes here so we can read it in revision

1033

# flush any pending writes here so we can read it in revision

1025

if dfh:

1034

if dfh:

1026

dfh.flush()

1035

dfh.flush()

1027

ifh.flush()

1036

ifh.flush()

1028

basetext = self.revision(self.node(cachedelta[0]))

1037

basetext = self.revision(self.node(cachedelta[0]))

1029

btext[0] = mdiff.patch(basetext, cachedelta[1])

1038

btext[0] = mdiff.patch(basetext, cachedelta[1])

1030

chk = hash(btext[0], p1, p2)

1039

chk = hash(btext[0], p1, p2)

1031

if chk != node:

1040

if chk != node:

1032

raise RevlogError(_("consistency error in delta"))

1041

raise RevlogError(_("consistency error in delta"))

1033

return btext[0]

1042

return btext[0]

1034

1043

1035

def builddelta(rev):

1044

def builddelta(rev):

1036

# can we use the cached delta?

1045

# can we use the cached delta?

1037

if cachedelta and cachedelta[0] == rev:

1046

if cachedelta and cachedelta[0] == rev:

1038

delta = cachedelta[1]

1047

delta = cachedelta[1]

1039

else:

1048

else:

1040

t = buildtext()

1049

t = buildtext()

1041

ptext = self.revision(self.node(rev))

1050

ptext = self.revision(self.node(rev))

1042

delta = mdiff.textdiff(ptext, t)

1051

delta = mdiff.textdiff(ptext, t)

1043

data = compress(delta)

1052

data = compress(delta)

1044

l = len(data[1]) + len(data[0])

1053

l = len(data[1]) + len(data[0])

1045

if basecache[0] == rev:

1054

if basecache[0] == rev:

1046

chainbase = basecache[1]

1055

chainbase = basecache[1]

1047

else:

1056

else:

1048

chainbase = self.chainbase(rev)

1057

chainbase = self.chainbase(rev)

1049

dist = l + offset - self.start(chainbase)

1050

if self._generaldelta:

1058

if self._generaldelta:

1059

dist = l + self.chainlength(rev)

1051

base = rev

1060

base = rev

1052

else:

1061

else:

1062

dist = l + offset - self.start(chainbase)

1053

base = chainbase

1063

base = chainbase

1054

return dist, l, data, base, chainbase

1064

return dist, l, data, base, chainbase

1055

1065

1056

curr = len(self)

1066

curr = len(self)

1057

prev = curr - 1

1067

prev = curr - 1

1058

base = chainbase = curr

1068

base = chainbase = curr

1059

offset = self.end(prev)

1069

offset = self.end(prev)

1060

flags = 0

1070

flags = 0

1061

d = None

1071

d = None

1062

basecache = self._basecache

1072

basecache = self._basecache

1063

p1r, p2r = self.rev(p1), self.rev(p2)

1073

p1r, p2r = self.rev(p1), self.rev(p2)

1064

1074

1065

# should we try to build a delta?

1075

# should we try to build a delta?

1066

if prev != nullrev:

1076

if prev != nullrev:

1067

if self._generaldelta:

1077

if self._generaldelta:

1068

if p1r >= basecache[1]:

1078

if p1r >= basecache[1]:

1069

d = builddelta(p1r)

1079

d = builddelta(p1r)

1070

elif p2r >= basecache[1]:

1080

elif p2r >= basecache[1]:

1071

d = builddelta(p2r)

1081

d = builddelta(p2r)

1072

else:

1082

else:

1073

d = builddelta(prev)

1083

d = builddelta(prev)

1074

else:

1084

else:

1075

d = builddelta(prev)

1085

d = builddelta(prev)

1076

dist, l, data, base, chainbase = d

1086

dist, l, data, base, chainbase = d

1077

1087

1078

# full versions are inserted when the needed deltas

1088

# full versions are inserted when the needed deltas

1079

# become comparable to the uncompressed text

1089

# become comparable to the uncompressed text

1080

if text is None:

1090

if text is None:

1081

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1091

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1082

cachedelta[1])

1092

cachedelta[1])

1083

else:

1093

else:

1084

textlen = len(text)

1094

textlen = len(text)

1085

if d is None or dist > textlen * 2:

1095

if d is None or dist > textlen * 2:

1086

text = buildtext()

1096

text = buildtext()

1087

data = compress(text)

1097

data = compress(text)

1088

l = len(data[1]) + len(data[0])

1098

l = len(data[1]) + len(data[0])

1089

base = chainbase = curr

1099

base = chainbase = curr

1090

1100

1091

e = (offset_type(offset, flags), l, textlen,

1101

e = (offset_type(offset, flags), l, textlen,

1092

base, link, p1r, p2r, node)

1102

base, link, p1r, p2r, node)

1093

self.index.insert(-1, e)

1103

self.index.insert(-1, e)

1094

self.nodemap[node] = curr

1104

self.nodemap[node] = curr

1095

1105

1096

entry = self._io.packentry(e, self.node, self.version, curr)

1106

entry = self._io.packentry(e, self.node, self.version, curr)

1097

if not self._inline:

1107

if not self._inline:

1098

transaction.add(self.datafile, offset)

1108

transaction.add(self.datafile, offset)

1099

transaction.add(self.indexfile, curr * len(entry))

1109

transaction.add(self.indexfile, curr * len(entry))

1100

if data[0]:

1110

if data[0]:

1101

dfh.write(data[0])

1111

dfh.write(data[0])

1102

dfh.write(data[1])

1112

dfh.write(data[1])

1103

dfh.flush()

1113

dfh.flush()

1104

ifh.write(entry)

1114

ifh.write(entry)

1105

else:

1115

else:

1106

offset += curr * self._io.size

1116

offset += curr * self._io.size

1107

transaction.add(self.indexfile, offset, curr)

1117

transaction.add(self.indexfile, offset, curr)

1108

ifh.write(entry)

1118

ifh.write(entry)

1109

ifh.write(data[0])

1119

ifh.write(data[0])

1110

ifh.write(data[1])

1120

ifh.write(data[1])

1111

self.checkinlinesize(transaction, ifh)

1121

self.checkinlinesize(transaction, ifh)

1112

1122

1113

if type(text) == str: # only accept immutable objects

1123

if type(text) == str: # only accept immutable objects

1114

self._cache = (node, curr, text)

1124

self._cache = (node, curr, text)

1115

self._basecache = (curr, chainbase)

1125

self._basecache = (curr, chainbase)

1116

return node

1126

return node

1117

1127

1118

def group(self, nodelist, bundler, reorder=None):

1128

def group(self, nodelist, bundler, reorder=None):

1119

"""Calculate a delta group, yielding a sequence of changegroup chunks

1129

"""Calculate a delta group, yielding a sequence of changegroup chunks

1120

(strings).

1130

(strings).

1121

1131

1122

Given a list of changeset revs, return a set of deltas and

1132

Given a list of changeset revs, return a set of deltas and

1123

metadata corresponding to nodes. The first delta is

1133

metadata corresponding to nodes. The first delta is

1124

first parent(nodelist[0]) -> nodelist[0], the receiver is

1134

first parent(nodelist[0]) -> nodelist[0], the receiver is

1125

guaranteed to have this parent as it has all history before

1135

guaranteed to have this parent as it has all history before

1126

these changesets. In the case firstparent is nullrev the

1136

these changesets. In the case firstparent is nullrev the

1127

changegroup starts with a full revision.

1137

changegroup starts with a full revision.

1128

"""

1138

"""

1129

1139

1130

# if we don't have any revisions touched by these changesets, bail

1140

# if we don't have any revisions touched by these changesets, bail

1131

if len(nodelist) == 0:

1141

if len(nodelist) == 0:

1132

yield bundler.close()

1142

yield bundler.close()

1133

return

1143

return

1134

1144

1135

# for generaldelta revlogs, we linearize the revs; this will both be

1145

# for generaldelta revlogs, we linearize the revs; this will both be

1136

# much quicker and generate a much smaller bundle

1146

# much quicker and generate a much smaller bundle

1137

if (self._generaldelta and reorder is not False) or reorder:

1147

if (self._generaldelta and reorder is not False) or reorder:

1138

dag = dagutil.revlogdag(self)

1148

dag = dagutil.revlogdag(self)

1139

revs = set(self.rev(n) for n in nodelist)

1149

revs = set(self.rev(n) for n in nodelist)

1140

revs = dag.linearize(revs)

1150

revs = dag.linearize(revs)

1141

else:

1151

else:

1142

revs = sorted([self.rev(n) for n in nodelist])

1152

revs = sorted([self.rev(n) for n in nodelist])

1143

1153

1144

# add the parent of the first rev

1154

# add the parent of the first rev

1145

p = self.parentrevs(revs[0])[0]

1155

p = self.parentrevs(revs[0])[0]

1146

revs.insert(0, p)

1156

revs.insert(0, p)

1147

1157

1148

# build deltas

1158

# build deltas

1149

for r in xrange(len(revs) - 1):

1159

for r in xrange(len(revs) - 1):

1150

prev, curr = revs[r], revs[r + 1]

1160

prev, curr = revs[r], revs[r + 1]

1151

for c in bundler.revchunk(self, curr, prev):

1161

for c in bundler.revchunk(self, curr, prev):

1152

yield c

1162

yield c

1153

1163

1154

yield bundler.close()

1164

yield bundler.close()

1155

1165

1156

def addgroup(self, bundle, linkmapper, transaction):

1166

def addgroup(self, bundle, linkmapper, transaction):

1157

"""

1167

"""

1158

add a delta group

1168

add a delta group

1159

1169

1160

given a set of deltas, add them to the revision log. the

1170

given a set of deltas, add them to the revision log. the

1161

first delta is against its parent, which should be in our

1171

first delta is against its parent, which should be in our

1162

log, the rest are against the previous delta.

1172

log, the rest are against the previous delta.

1163

"""

1173

"""

1164

1174

1165

# track the base of the current delta log

1175

# track the base of the current delta log

1166

content = []

1176

content = []

1167

node = None

1177

node = None

1168

1178

1169

r = len(self)

1179

r = len(self)

1170

end = 0

1180

end = 0

1171

if r:

1181

if r:

1172

end = self.end(r - 1)

1182

end = self.end(r - 1)

1173

ifh = self.opener(self.indexfile, "a+")

1183

ifh = self.opener(self.indexfile, "a+")

1174

isize = r * self._io.size

1184

isize = r * self._io.size

1175

if self._inline:

1185

if self._inline:

1176

transaction.add(self.indexfile, end + isize, r)

1186

transaction.add(self.indexfile, end + isize, r)

1177

dfh = None

1187

dfh = None

1178

else:

1188

else:

1179

transaction.add(self.indexfile, isize, r)

1189

transaction.add(self.indexfile, isize, r)

1180

transaction.add(self.datafile, end)

1190

transaction.add(self.datafile, end)

1181

dfh = self.opener(self.datafile, "a")

1191

dfh = self.opener(self.datafile, "a")

1182

1192

1183

try:

1193

try:

1184

# loop through our set of deltas

1194

# loop through our set of deltas

1185

chain = None

1195

chain = None

1186

while True:

1196

while True:

1187

chunkdata = bundle.deltachunk(chain)

1197

chunkdata = bundle.deltachunk(chain)

1188

if not chunkdata:

1198

if not chunkdata:

1189

break

1199

break

1190

node = chunkdata['node']

1200

node = chunkdata['node']

1191

p1 = chunkdata['p1']

1201

p1 = chunkdata['p1']

1192

p2 = chunkdata['p2']

1202

p2 = chunkdata['p2']

1193

cs = chunkdata['cs']

1203

cs = chunkdata['cs']

1194

deltabase = chunkdata['deltabase']

1204

deltabase = chunkdata['deltabase']

1195

delta = chunkdata['delta']

1205

delta = chunkdata['delta']

1196

1206

1197

content.append(node)

1207

content.append(node)

1198

1208

1199

link = linkmapper(cs)

1209

link = linkmapper(cs)

1200

if node in self.nodemap:

1210

if node in self.nodemap:

1201

# this can happen if two branches make the same change

1211

# this can happen if two branches make the same change

1202

chain = node

1212

chain = node

1203

continue

1213

continue

1204

1214

1205

for p in (p1, p2):

1215

for p in (p1, p2):

1206

if p not in self.nodemap:

1216

if p not in self.nodemap:

1207

raise LookupError(p, self.indexfile,

1217

raise LookupError(p, self.indexfile,

1208

_('unknown parent'))

1218

_('unknown parent'))

1209

1219

1210

if deltabase not in self.nodemap:

1220

if deltabase not in self.nodemap:

1211

raise LookupError(deltabase, self.indexfile,

1221

raise LookupError(deltabase, self.indexfile,

1212

_('unknown delta base'))

1222

_('unknown delta base'))

1213

1223

1214

baserev = self.rev(deltabase)

1224

baserev = self.rev(deltabase)

1215

chain = self._addrevision(node, None, transaction, link,

1225

chain = self._addrevision(node, None, transaction, link,

1216

p1, p2, (baserev, delta), ifh, dfh)

1226

p1, p2, (baserev, delta), ifh, dfh)

1217

if not dfh and not self._inline:

1227

if not dfh and not self._inline:

1218

# addrevision switched from inline to conventional

1228

# addrevision switched from inline to conventional

1219

# reopen the index

1229

# reopen the index

1220

ifh.close()

1230

ifh.close()

1221

dfh = self.opener(self.datafile, "a")

1231

dfh = self.opener(self.datafile, "a")

1222

ifh = self.opener(self.indexfile, "a")

1232

ifh = self.opener(self.indexfile, "a")

1223

finally:

1233

finally:

1224

if dfh:

1234

if dfh:

1225

dfh.close()

1235

dfh.close()

1226

ifh.close()

1236

ifh.close()

1227

1237

1228

return content

1238

return content

1229

1239

1230

def strip(self, minlink, transaction):

1240

def strip(self, minlink, transaction):

1231

"""truncate the revlog on the first revision with a linkrev >= minlink

1241

"""truncate the revlog on the first revision with a linkrev >= minlink

1232

1242

1233

This function is called when we're stripping revision minlink and

1243

This function is called when we're stripping revision minlink and

1234

its descendants from the repository.

1244

its descendants from the repository.

1235

1245

1236

We have to remove all revisions with linkrev >= minlink, because

1246

We have to remove all revisions with linkrev >= minlink, because

1237

the equivalent changelog revisions will be renumbered after the

1247

the equivalent changelog revisions will be renumbered after the

1238

strip.

1248

strip.

1239

1249

1240

So we truncate the revlog on the first of these revisions, and

1250

So we truncate the revlog on the first of these revisions, and

1241

trust that the caller has saved the revisions that shouldn't be

1251

trust that the caller has saved the revisions that shouldn't be

1242

removed and that it'll re-add them after this truncation.

1252

removed and that it'll re-add them after this truncation.

1243

"""

1253

"""

1244

if len(self) == 0:

1254

if len(self) == 0:

1245

return

1255

return

1246

1256

1247

for rev in self:

1257

for rev in self:

1248

if self.index[rev][4] >= minlink:

1258

if self.index[rev][4] >= minlink:

1249

break

1259

break

1250

else:

1260

else:

1251

return

1261

return

1252

1262

1253

# first truncate the files on disk

1263

# first truncate the files on disk

1254

end = self.start(rev)

1264

end = self.start(rev)

1255

if not self._inline:

1265

if not self._inline:

1256

transaction.add(self.datafile, end)

1266

transaction.add(self.datafile, end)

1257

end = rev * self._io.size

1267

end = rev * self._io.size

1258

else:

1268

else:

1259

end += rev * self._io.size

1269

end += rev * self._io.size

1260

1270

1261

transaction.add(self.indexfile, end)

1271

transaction.add(self.indexfile, end)

1262

1272

1263

# then reset internal state in memory to forget those revisions

1273

# then reset internal state in memory to forget those revisions

1264

self._cache = None

1274

self._cache = None

1265

self._chunkclear()

1275

self._chunkclear()

1266

for x in xrange(rev, len(self)):

1276

for x in xrange(rev, len(self)):

1267

del self.nodemap[self.node(x)]

1277

del self.nodemap[self.node(x)]

1268

1278

1269

del self.index[rev:-1]

1279

del self.index[rev:-1]

1270

1280

1271

def checksize(self):

1281

def checksize(self):

1272

expected = 0

1282

expected = 0

1273

if len(self):

1283

if len(self):

1274

expected = max(0, self.end(len(self) - 1))

1284

expected = max(0, self.end(len(self) - 1))

1275

1285

1276

try:

1286

try:

1277

f = self.opener(self.datafile)

1287

f = self.opener(self.datafile)

1278

f.seek(0, 2)

1288

f.seek(0, 2)

1279

actual = f.tell()

1289

actual = f.tell()

1280

f.close()

1290

f.close()

1281

dd = actual - expected

1291

dd = actual - expected

1282

except IOError, inst:

1292

except IOError, inst:

1283

if inst.errno != errno.ENOENT:

1293

if inst.errno != errno.ENOENT:

1284

raise

1294

raise

1285

dd = 0

1295

dd = 0

1286

1296

1287

try:

1297

try:

1288

f = self.opener(self.indexfile)

1298

f = self.opener(self.indexfile)

1289

f.seek(0, 2)

1299

f.seek(0, 2)

1290

actual = f.tell()

1300

actual = f.tell()

1291

f.close()

1301

f.close()

1292

s = self._io.size

1302

s = self._io.size

1293

i = max(0, actual // s)

1303

i = max(0, actual // s)

1294

di = actual - (i * s)

1304

di = actual - (i * s)

1295

if self._inline:

1305

if self._inline:

1296

databytes = 0

1306

databytes = 0

1297

for r in self:

1307

for r in self:

1298

databytes += max(0, self.length(r))

1308

databytes += max(0, self.length(r))

1299

dd = 0

1309

dd = 0

1300

di = actual - len(self) * s - databytes

1310

di = actual - len(self) * s - databytes

1301

except IOError, inst:

1311

except IOError, inst:

1302

if inst.errno != errno.ENOENT:

1312

if inst.errno != errno.ENOENT:

1303

raise

1313

raise

1304

di = 0

1314

di = 0

1305

1315

1306

return (dd, di)

1316

return (dd, di)

1307

1317

1308

def files(self):

1318

def files(self):

1309

res = [self.indexfile]

1319

res = [self.indexfile]

1310

if not self._inline:

1320

if not self._inline:

1311

res.append(self.datafile)

1321

res.append(self.datafile)

1312

return res

1322

return res

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             # import stuff from node for others to import from revlog
             from node import bin, hex, nullid, nullrev
             from i18n import _
             import ancestor, mdiff, parsers, error, util, dagutil
             import struct, zlib, errno
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_KNOWN_FLAGS = 0
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             nullhash = _sha(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = _sha(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
                 bin = None
                 if l < 44:
                     pass
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
                 if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     try:
                         return _decompress(bin)
                     except zlib.error, e:
                         raise RevlogError(_("revlog decompress error: %s") % str(e))
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._basecache = (0, 0)
                     self._chunkcache = (0, '')
                     self.index = []
                     self._pcache = {}
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                     i = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read()
                         f.close()
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i[:4])[0]
                             self._initempty = False
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(i, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     for i in xrange(len(self)):
                         yield i
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def chainbase(self, rev):
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     return base
+                def chainlength(self, rev):
+                    index = self.index
+                    base = index[rev][3]
+                    length = index[rev][1]
+                    while base != rev:
+                        rev = base
+                        base = index[rev][3]
+                        length = length + index[rev][1]
+                    return length
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def ancestors(self, revs, stoprev=0):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     Yield a sequence of revision numbers starting with the parents
                     of each revision in revs, i.e., each revision is *not* considered
                     an ancestor of itself.  Results are in breadth-first order:
                     parents of each rev in revs, then parents of those, etc.  Result
                     does not include the null revision."""
                     visit = util.deque(revs)
                     seen = set([nullrev])
                     while visit:
                         for parent in self.parentrevs(visit.popleft()):
                             if parent < stoprev:
                                 continue
                             if parent not in seen:
                                 visit.append(parent)
                                 seen.add(parent)
                                 yield parent
                 def incancestors(self, revs, stoprev=0):
                     """Identical to ancestors() except it also generates the
                     revisions, 'revs'"""
                     for rev in revs:
                         yield rev
                     for rev in self.ancestors(revs, stoprev):
                         yield rev
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in xrange(first + 1, len(self)):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     has = set(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = util.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(r) for r in missing]
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     _common, missing = self.findcommonmissing(common, heads)
                     return missing
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendants.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         pass
                     count = len(self)
                     if not count:
                         return [nullrev]
                     ishead = [1] * (count + 1)
                     index = self.index
                     for r in xrange(count):
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0
                     return [r for r in xrange(count) if ishead[r]]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, len(self)):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, len(self)):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # fast path, check if it is a descendant
                     a, b = self.rev(a), self.rev(b)
                     start, end = sorted((a, b))
                     if self.descendant(start, end):
                         return self.node(start)
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(a, b, parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         return self.index.partialmatch(id)
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         raise LookupError(id, self.indexfile, _("ambiguous identifier"))
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length):
                     if self._inline:
                         df = self.opener(self.indexfile)
                     else:
                         df = self.opener(self.datafile)
                     readahead = max(65536, length)
                     df.seek(offset)
                     d = df.read(readahead)
                     df.close()
                     self._addchunk(offset, d)
                     if readahead > length:
                         return util.buffer(d, 0, length)
                     return d
                 def _getchunk(self, offset, length):
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length)
                 def _chunkraw(self, startrev, endrev):
                     start = self.start(startrev)
                     length = self.end(endrev) - start
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                     return self._getchunk(start, length)
                 def _chunk(self, rev):
                     return decompress(self._chunkraw(rev, rev))
                 def _chunkbase(self, rev):
                     return self._chunk(rev)
                 def _chunkclear(self):
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return str(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1),
                                           self.revision(rev2))
                 def revision(self, nodeorrev):
                     """return an uncompressed revision of a given node or revision
                     number.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     if rev is None:
                         rev = self.rev(node)
                     # check rev flags
                     if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
                     # build delta chain
                     chain = []
                     index = self.index # for performance
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != cachedrev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     chain.reverse()
                     base = iterrev
                     if iterrev == cachedrev:
                         # cache hit
                         text = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     self._chunkraw(base, rev)
                     if text is None:
                         text = str(self._chunkbase(base))
                     bins = [self._chunk(r) for r in chain]
                     text = mdiff.patches(text, bins)
                     text = self._checkhash(text, node, rev)
                     self._cache = (node, rev, text)
                     return text
                 def _checkhash(self, text, node, rev):
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.indexfile, rev))
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r))
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     """
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def _addrevision(self, node, text, transaction, link, p1, p2,
                                  cachedelta, ifh, dfh):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to eachother.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         # flush any pending writes here so we can read it in revision
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                         basetext = self.revision(self.node(cachedelta[0]))
                         btext[0] = mdiff.patch(basetext, cachedelta[1])
                         chk = hash(btext[0], p1, p2)
                         if chk != node:
                             raise RevlogError(_("consistency error in delta"))
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             ptext = self.revision(self.node(rev))
                             delta = mdiff.textdiff(ptext, t)
                         data = compress(delta)
                         l = len(data[1]) + len(data[0])
                         if basecache[0] == rev:
                             chainbase = basecache[1]
                         else:
                             chainbase = self.chainbase(rev)
-                        dist = l + offset - self.start(chainbase)
                         if self._generaldelta:
+                            dist = l + self.chainlength(rev)
                             base = rev
                         else:
+                            dist = l + offset - self.start(chainbase)
                             base = chainbase
                         return dist, l, data, base, chainbase
                     curr = len(self)
                     prev = curr - 1
                     base = chainbase = curr
                     offset = self.end(prev)
                     flags = 0
                     d = None
                     basecache = self._basecache
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev:
                         if self._generaldelta:
                             if p1r >= basecache[1]:
                                 d = builddelta(p1r)
                             elif p2r >= basecache[1]:
                                 d = builddelta(p2r)
                             else:
                                 d = builddelta(prev)
                         else:
                             d = builddelta(prev)
                         dist, l, data, base, chainbase = d
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     if d is None or dist > textlen * 2:
                         text = buildtext()
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._basecache = (curr, chainbase)
                     return node
                 def group(self, nodelist, bundler, reorder=None):
                     """Calculate a delta group, yielding a sequence of changegroup chunks
                     (strings).
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. The first delta is
                     first parent(nodelist[0]) -> nodelist[0], the receiver is
                     guaranteed to have this parent as it has all history before
                     these changesets. In the case firstparent is nullrev the
                     changegroup starts with a full revision.
                     """
                     # if we don't have any revisions touched by these changesets, bail
                     if len(nodelist) == 0:
                         yield bundler.close()
                         return
                     # for generaldelta revlogs, we linearize the revs; this will both be
                     # much quicker and generate a much smaller bundle
                     if (self._generaldelta and reorder is not False) or reorder:
                         dag = dagutil.revlogdag(self)
                         revs = set(self.rev(n) for n in nodelist)
                         revs = dag.linearize(revs)
                     else:
                         revs = sorted([self.rev(n) for n in nodelist])
                     # add the parent of the first rev
                     p = self.parentrevs(revs[0])[0]
                     revs.insert(0, p)
                     # build deltas
                     for r in xrange(len(revs) - 1):
                         prev, curr = revs[r], revs[r + 1]
                         for c in bundler.revchunk(self, curr, prev):
                             yield c
                     yield bundler.close()
                 def addgroup(self, bundle, linkmapper, transaction):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     try:
                         # loop through our set of deltas
                         chain = None
                         while True:
                             chunkdata = bundle.deltachunk(chain)
                             if not chunkdata:
                                 break
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, (baserev, delta), ifh, dfh)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a")
                                 ifh = self.opener(self.indexfile, "a")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     for rev in self:
                         if self.index[rev][4] >= minlink:
                             break
                     else:
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res