upstream/mercurial-mirror Commit - r27070:7860366b

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

import collections

15

import collections

16

from node import bin, hex, nullid, nullrev

16

from node import bin, hex, nullid, nullrev

17

from i18n import _

17

from i18n import _

18

import ancestor, mdiff, parsers, error, util, templatefilters

18

import ancestor, mdiff, parsers, error, util, templatefilters

19

import struct, zlib, errno

19

import struct, zlib, errno

20

21

_pack = struct.pack

21

_pack = struct.pack

22

_unpack = struct.unpack

22

_unpack = struct.unpack

23

_compress = zlib.compress

23

_compress = zlib.compress

24

_decompress = zlib.decompress

24

_decompress = zlib.decompress

25

_sha = util.sha1

25

_sha = util.sha1

26

27

# revlog header flags

27

# revlog header flags

28

REVLOGV0 = 0

28

REVLOGV0 = 0

29

REVLOGNG = 1

29

REVLOGNG = 1

30

REVLOGNGINLINEDATA = (1 << 16)

30

REVLOGNGINLINEDATA = (1 << 16)

31

REVLOGGENERALDELTA = (1 << 17)

31

REVLOGGENERALDELTA = (1 << 17)

32

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

32

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

33

REVLOG_DEFAULT_FORMAT = REVLOGNG

33

REVLOG_DEFAULT_FORMAT = REVLOGNG

34

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

34

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

35

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

35

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

36

37

# revlog index flags

37

# revlog index flags

38

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

38

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

39

REVIDX_DEFAULT_FLAGS = 0

39

REVIDX_DEFAULT_FLAGS = 0

40

REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED

40

REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED

41

42

# max size of revlog with inline data

42

# max size of revlog with inline data

43

_maxinline = 131072

43

_maxinline = 131072

44

_chunksize = 1048576

44

_chunksize = 1048576

45

46

RevlogError = error.RevlogError

46

RevlogError = error.RevlogError

47

LookupError = error.LookupError

47

LookupError = error.LookupError

48

CensoredNodeError = error.CensoredNodeError

48

CensoredNodeError = error.CensoredNodeError

49

50

def getoffset(q):

50

def getoffset(q):

51

return int(q >> 16)

51

return int(q >> 16)

52

53

def gettype(q):

53

def gettype(q):

54

return int(q & 0xFFFF)

54

return int(q & 0xFFFF)

55

56

def offset_type(offset, type):

56

def offset_type(offset, type):

57

return long(long(offset) << 16 | type)

57

return long(long(offset) << 16 | type)

58

59

_nullhash = _sha(nullid)

59

_nullhash = _sha(nullid)

60

61

def hash(text, p1, p2):

61

def hash(text, p1, p2):

62

"""generate a hash from the given text and its parent hashes

62

"""generate a hash from the given text and its parent hashes

63

64

This hash combines both the current file contents and its history

64

This hash combines both the current file contents and its history

65

in a manner that makes it easy to distinguish nodes with the same

65

in a manner that makes it easy to distinguish nodes with the same

66

content in the revision graph.

66

content in the revision graph.

67

"""

67

"""

68

# As of now, if one of the parent node is null, p2 is null

68

# As of now, if one of the parent node is null, p2 is null

69

if p2 == nullid:

69

if p2 == nullid:

70

# deep copy of a hash is faster than creating one

70

# deep copy of a hash is faster than creating one

71

s = _nullhash.copy()

71

s = _nullhash.copy()

72

s.update(p1)

72

s.update(p1)

73

else:

73

else:

74

# none of the parent nodes are nullid

74

# none of the parent nodes are nullid

75

l = [p1, p2]

75

l = [p1, p2]

76

l.sort()

76

l.sort()

77

s = _sha(l[0])

77

s = _sha(l[0])

78

s.update(l[1])

78

s.update(l[1])

79

s.update(text)

79

s.update(text)

80

return s.digest()

80

return s.digest()

81

82

def decompress(bin):

82

def decompress(bin):

83

""" decompress the given input """

83

""" decompress the given input """

84

if not bin:

84

if not bin:

85

return bin

85

return bin

86

t = bin[0]

86

t = bin[0]

87

if t == '\0':

87

if t == '\0':

88

return bin

88

return bin

89

if t == 'x':

89

if t == 'x':

90

try:

90

try:

91

return _decompress(bin)

91

return _decompress(bin)

92

except zlib.error as e:

92

except zlib.error as e:

93

raise RevlogError(_("revlog decompress error: %s") % str(e))

93

raise RevlogError(_("revlog decompress error: %s") % str(e))

94

if t == 'u':

94

if t == 'u':

95

return bin[1:]

95

return bin[1:]

96

raise RevlogError(_("unknown compression type %r") % t)

96

raise RevlogError(_("unknown compression type %r") % t)

97

98

# index v0:

98

# index v0:

99

# 4 bytes: offset

99

# 4 bytes: offset

100

# 4 bytes: compressed length

100

# 4 bytes: compressed length

101

# 4 bytes: base rev

101

# 4 bytes: base rev

102

# 4 bytes: link rev

102

# 4 bytes: link rev

103

# 20 bytes: parent 1 nodeid

103

# 20 bytes: parent 1 nodeid

104

# 20 bytes: parent 2 nodeid

104

# 20 bytes: parent 2 nodeid

105

# 20 bytes: nodeid

105

# 20 bytes: nodeid

106

indexformatv0 = ">4l20s20s20s"

106

indexformatv0 = ">4l20s20s20s"

107

108

class revlogoldio(object):

108

class revlogoldio(object):

109

def __init__(self):

109

def __init__(self):

110

self.size = struct.calcsize(indexformatv0)

110

self.size = struct.calcsize(indexformatv0)

111

112

def parseindex(self, data, inline):

112

def parseindex(self, data, inline):

113

s = self.size

113

s = self.size

114

index = []

114

index = []

115

nodemap = {nullid: nullrev}

115

nodemap = {nullid: nullrev}

116

n = off = 0

116

n = off = 0

117

l = len(data)

117

l = len(data)

118

while off + s <= l:

118

while off + s <= l:

119

cur = data[off:off + s]

119

cur = data[off:off + s]

120

off += s

120

off += s

121

e = _unpack(indexformatv0, cur)

121

e = _unpack(indexformatv0, cur)

122

# transform to revlogv1 format

122

# transform to revlogv1 format

123

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

123

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

124

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

124

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

125

index.append(e2)

125

index.append(e2)

126

nodemap[e[6]] = n

126

nodemap[e[6]] = n

127

n += 1

127

n += 1

128

129

# add the magic null revision at -1

129

# add the magic null revision at -1

130

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

130

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

131

132

return index, nodemap, None

132

return index, nodemap, None

133

134

def packentry(self, entry, node, version, rev):

134

def packentry(self, entry, node, version, rev):

135

if gettype(entry[0]):

135

if gettype(entry[0]):

136

raise RevlogError(_("index entry flags need RevlogNG"))

136

raise RevlogError(_("index entry flags need RevlogNG"))

137

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

137

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

138

node(entry[5]), node(entry[6]), entry[7])

138

node(entry[5]), node(entry[6]), entry[7])

139

return _pack(indexformatv0, *e2)

139

return _pack(indexformatv0, *e2)

140

141

# index ng:

141

# index ng:

142

# 6 bytes: offset

142

# 6 bytes: offset

143

# 2 bytes: flags

143

# 2 bytes: flags

144

# 4 bytes: compressed length

144

# 4 bytes: compressed length

145

# 4 bytes: uncompressed length

145

# 4 bytes: uncompressed length

146

# 4 bytes: base rev

146

# 4 bytes: base rev

147

# 4 bytes: link rev

147

# 4 bytes: link rev

148

# 4 bytes: parent 1 rev

148

# 4 bytes: parent 1 rev

149

# 4 bytes: parent 2 rev

149

# 4 bytes: parent 2 rev

150

# 32 bytes: nodeid

150

# 32 bytes: nodeid

151

indexformatng = ">Qiiiiii20s12x"

151

indexformatng = ">Qiiiiii20s12x"

152

versionformat = ">I"

152

versionformat = ">I"

153

154

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

154

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

155

# signed integer)

155

# signed integer)

156

_maxentrysize = 0x7fffffff

156

_maxentrysize = 0x7fffffff

157

158

class revlogio(object):

158

class revlogio(object):

159

def __init__(self):

159

def __init__(self):

160

self.size = struct.calcsize(indexformatng)

160

self.size = struct.calcsize(indexformatng)

161

162

def parseindex(self, data, inline):

162

def parseindex(self, data, inline):

163

# call the C implementation to parse the index data

163

# call the C implementation to parse the index data

164

index, cache = parsers.parse_index2(data, inline)

164

index, cache = parsers.parse_index2(data, inline)

165

return index, getattr(index, 'nodemap', None), cache

165

return index, getattr(index, 'nodemap', None), cache

166

167

def packentry(self, entry, node, version, rev):

167

def packentry(self, entry, node, version, rev):

168

p = _pack(indexformatng, *entry)

168

p = _pack(indexformatng, *entry)

169

if rev == 0:

169

if rev == 0:

170

p = _pack(versionformat, version) + p[4:]

170

p = _pack(versionformat, version) + p[4:]

171

return p

171

return p

172

173

class revlog(object):

173

class revlog(object):

174

"""

174

"""

175

the underlying revision storage object

175

the underlying revision storage object

176

177

A revlog consists of two parts, an index and the revision data.

177

A revlog consists of two parts, an index and the revision data.

178

179

The index is a file with a fixed record size containing

179

The index is a file with a fixed record size containing

180

information on each revision, including its nodeid (hash), the

180

information on each revision, including its nodeid (hash), the

181

nodeids of its parents, the position and offset of its data within

181

nodeids of its parents, the position and offset of its data within

182

the data file, and the revision it's based on. Finally, each entry

182

the data file, and the revision it's based on. Finally, each entry

183

contains a linkrev entry that can serve as a pointer to external

183

contains a linkrev entry that can serve as a pointer to external

184

data.

184

data.

185

186

The revision data itself is a linear collection of data chunks.

186

The revision data itself is a linear collection of data chunks.

187

Each chunk represents a revision and is usually represented as a

187

Each chunk represents a revision and is usually represented as a

188

delta against the previous chunk. To bound lookup time, runs of

188

delta against the previous chunk. To bound lookup time, runs of

189

deltas are limited to about 2 times the length of the original

189

deltas are limited to about 2 times the length of the original

190

version data. This makes retrieval of a version proportional to

190

version data. This makes retrieval of a version proportional to

191

its size, or O(1) relative to the number of revisions.

191

its size, or O(1) relative to the number of revisions.

192

193

Both pieces of the revlog are written to in an append-only

193

Both pieces of the revlog are written to in an append-only

194

fashion, which means we never need to rewrite a file to insert or

194

fashion, which means we never need to rewrite a file to insert or

195

remove data, and can use some simple techniques to avoid the need

195

remove data, and can use some simple techniques to avoid the need

196

for locking while reading.

196

for locking while reading.

197

"""

197

"""

198

def __init__(self, opener, indexfile):

198

def __init__(self, opener, indexfile):

199

"""

199

"""

200

create a revlog object

200

create a revlog object

201

202

opener is a function that abstracts the file opening operation

202

opener is a function that abstracts the file opening operation

203

and can be used to implement COW semantics or the like.

203

and can be used to implement COW semantics or the like.

204

"""

204

"""

205

self.indexfile = indexfile

205

self.indexfile = indexfile

206

self.datafile = indexfile[:-2] + ".d"

206

self.datafile = indexfile[:-2] + ".d"

207

self.opener = opener

207

self.opener = opener

208

# 3-tuple of (node, rev, text) for a raw revision.

208

self._cache = None

209

self._cache = None

210

# 2-tuple of (rev, baserev) defining the base revision the delta chain

211

# begins at for a revision.

209

self._basecache = None

212

self._basecache = None

213

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

210

self._chunkcache = (0, '')

214

self._chunkcache = (0, '')

215

# How much data to read and cache into the raw revlog data cache.

211

self._chunkcachesize = 65536

216

self._chunkcachesize = 65536

212

self._maxchainlen = None

217

self._maxchainlen = None

213

self._aggressivemergedeltas = False

218

self._aggressivemergedeltas = False

214

self.index = []

219

self.index = []

220

# Mapping of partial identifiers to full nodes.

215

self._pcache = {}

221

self._pcache = {}

222

# Mapping of revision integer to full node.

216

self._nodecache = {nullid: nullrev}

223

self._nodecache = {nullid: nullrev}

217

self._nodepos = None

224

self._nodepos = None

218

225

219

v = REVLOG_DEFAULT_VERSION

226

v = REVLOG_DEFAULT_VERSION

220

opts = getattr(opener, 'options', None)

227

opts = getattr(opener, 'options', None)

221

if opts is not None:

228

if opts is not None:

222

if 'revlogv1' in opts:

229

if 'revlogv1' in opts:

223

if 'generaldelta' in opts:

230

if 'generaldelta' in opts:

224

v |= REVLOGGENERALDELTA

231

v |= REVLOGGENERALDELTA

225

else:

232

else:

226

v = 0

233

v = 0

227

if 'chunkcachesize' in opts:

234

if 'chunkcachesize' in opts:

228

self._chunkcachesize = opts['chunkcachesize']

235

self._chunkcachesize = opts['chunkcachesize']

229

if 'maxchainlen' in opts:

236

if 'maxchainlen' in opts:

230

self._maxchainlen = opts['maxchainlen']

237

self._maxchainlen = opts['maxchainlen']

231

if 'aggressivemergedeltas' in opts:

238

if 'aggressivemergedeltas' in opts:

232

self._aggressivemergedeltas = opts['aggressivemergedeltas']

239

self._aggressivemergedeltas = opts['aggressivemergedeltas']

233

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

240

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

234

241

235

if self._chunkcachesize <= 0:

242

if self._chunkcachesize <= 0:

236

raise RevlogError(_('revlog chunk cache size %r is not greater '

243

raise RevlogError(_('revlog chunk cache size %r is not greater '

237

'than 0') % self._chunkcachesize)

244

'than 0') % self._chunkcachesize)

238

elif self._chunkcachesize & (self._chunkcachesize - 1):

245

elif self._chunkcachesize & (self._chunkcachesize - 1):

239

raise RevlogError(_('revlog chunk cache size %r is not a power '

246

raise RevlogError(_('revlog chunk cache size %r is not a power '

240

'of 2') % self._chunkcachesize)

247

'of 2') % self._chunkcachesize)

241

248

242

indexdata = ''

249

indexdata = ''

243

self._initempty = True

250

self._initempty = True

244

try:

251

try:

245

f = self.opener(self.indexfile)

252

f = self.opener(self.indexfile)

246

indexdata = f.read()

253

indexdata = f.read()

247

f.close()

254

f.close()

248

if len(indexdata) > 0:

255

if len(indexdata) > 0:

249

v = struct.unpack(versionformat, indexdata[:4])[0]

256

v = struct.unpack(versionformat, indexdata[:4])[0]

250

self._initempty = False

257

self._initempty = False

251

except IOError as inst:

258

except IOError as inst:

252

if inst.errno != errno.ENOENT:

259

if inst.errno != errno.ENOENT:

253

raise

260

raise

254

261

255

self.version = v

262

self.version = v

256

self._inline = v & REVLOGNGINLINEDATA

263

self._inline = v & REVLOGNGINLINEDATA

257

self._generaldelta = v & REVLOGGENERALDELTA

264

self._generaldelta = v & REVLOGGENERALDELTA

258

flags = v & ~0xFFFF

265

flags = v & ~0xFFFF

259

fmt = v & 0xFFFF

266

fmt = v & 0xFFFF

260

if fmt == REVLOGV0 and flags:

267

if fmt == REVLOGV0 and flags:

261

raise RevlogError(_("index %s unknown flags %#04x for format v0")

268

raise RevlogError(_("index %s unknown flags %#04x for format v0")

262

% (self.indexfile, flags >> 16))

269

% (self.indexfile, flags >> 16))

263

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

270

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

264

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

271

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

265

% (self.indexfile, flags >> 16))

272

% (self.indexfile, flags >> 16))

266

elif fmt > REVLOGNG:

273

elif fmt > REVLOGNG:

267

raise RevlogError(_("index %s unknown format %d")

274

raise RevlogError(_("index %s unknown format %d")

268

% (self.indexfile, fmt))

275

% (self.indexfile, fmt))

269

276

270

self._io = revlogio()

277

self._io = revlogio()

271

if self.version == REVLOGV0:

278

if self.version == REVLOGV0:

272

self._io = revlogoldio()

279

self._io = revlogoldio()

273

try:

280

try:

274

d = self._io.parseindex(indexdata, self._inline)

281

d = self._io.parseindex(indexdata, self._inline)

275

except (ValueError, IndexError):

282

except (ValueError, IndexError):

276

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

283

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

277

self.index, nodemap, self._chunkcache = d

284

self.index, nodemap, self._chunkcache = d

278

if nodemap is not None:

285

if nodemap is not None:

279

self.nodemap = self._nodecache = nodemap

286

self.nodemap = self._nodecache = nodemap

280

if not self._chunkcache:

287

if not self._chunkcache:

281

self._chunkclear()

288

self._chunkclear()

282

# revnum -> (chain-length, sum-delta-length)

289

# revnum -> (chain-length, sum-delta-length)

283

self._chaininfocache = {}

290

self._chaininfocache = {}

284

291

285

def tip(self):

292

def tip(self):

286

return self.node(len(self.index) - 2)

293

return self.node(len(self.index) - 2)

287

def __contains__(self, rev):

294

def __contains__(self, rev):

288

return 0 <= rev < len(self)

295

return 0 <= rev < len(self)

289

def __len__(self):

296

def __len__(self):

290

return len(self.index) - 1

297

return len(self.index) - 1

291

def __iter__(self):

298

def __iter__(self):

292

return iter(xrange(len(self)))

299

return iter(xrange(len(self)))

293

def revs(self, start=0, stop=None):

300

def revs(self, start=0, stop=None):

294

"""iterate over all rev in this revlog (from start to stop)"""

301

"""iterate over all rev in this revlog (from start to stop)"""

295

step = 1

302

step = 1

296

if stop is not None:

303

if stop is not None:

297

if start > stop:

304

if start > stop:

298

step = -1

305

step = -1

299

stop += step

306

stop += step

300

else:

307

else:

301

stop = len(self)

308

stop = len(self)

302

return xrange(start, stop, step)

309

return xrange(start, stop, step)

303

310

304

@util.propertycache

311

@util.propertycache

305

def nodemap(self):

312

def nodemap(self):

306

self.rev(self.node(0))

313

self.rev(self.node(0))

307

return self._nodecache

314

return self._nodecache

308

315

309

def hasnode(self, node):

316

def hasnode(self, node):

310

try:

317

try:

311

self.rev(node)

318

self.rev(node)

312

return True

319

return True

313

except KeyError:

320

except KeyError:

314

return False

321

return False

315

322

316

def clearcaches(self):

323

def clearcaches(self):

317

try:

324

try:

318

self._nodecache.clearcaches()

325

self._nodecache.clearcaches()

319

except AttributeError:

326

except AttributeError:

320

self._nodecache = {nullid: nullrev}

327

self._nodecache = {nullid: nullrev}

321

self._nodepos = None

328

self._nodepos = None

322

329

323

def rev(self, node):

330

def rev(self, node):

324

try:

331

try:

325

return self._nodecache[node]

332

return self._nodecache[node]

326

except TypeError:

333

except TypeError:

327

raise

334

raise

328

except RevlogError:

335

except RevlogError:

329

# parsers.c radix tree lookup failed

336

# parsers.c radix tree lookup failed

330

raise LookupError(node, self.indexfile, _('no node'))

337

raise LookupError(node, self.indexfile, _('no node'))

331

except KeyError:

338

except KeyError:

332

# pure python cache lookup failed

339

# pure python cache lookup failed

333

n = self._nodecache

340

n = self._nodecache

334

i = self.index

341

i = self.index

335

p = self._nodepos

342

p = self._nodepos

336

if p is None:

343

if p is None:

337

p = len(i) - 2

344

p = len(i) - 2

338

for r in xrange(p, -1, -1):

345

for r in xrange(p, -1, -1):

339

v = i[r][7]

346

v = i[r][7]

340

n[v] = r

347

n[v] = r

341

if v == node:

348

if v == node:

342

self._nodepos = r - 1

349

self._nodepos = r - 1

343

return r

350

return r

344

raise LookupError(node, self.indexfile, _('no node'))

351

raise LookupError(node, self.indexfile, _('no node'))

345

352

346

def node(self, rev):

353

def node(self, rev):

347

return self.index[rev][7]

354

return self.index[rev][7]

348

def linkrev(self, rev):

355

def linkrev(self, rev):

349

return self.index[rev][4]

356

return self.index[rev][4]

350

def parents(self, node):

357

def parents(self, node):

351

i = self.index

358

i = self.index

352

d = i[self.rev(node)]

359

d = i[self.rev(node)]

353

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

360

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

354

def parentrevs(self, rev):

361

def parentrevs(self, rev):

355

return self.index[rev][5:7]

362

return self.index[rev][5:7]

356

def start(self, rev):

363

def start(self, rev):

357

return int(self.index[rev][0] >> 16)

364

return int(self.index[rev][0] >> 16)

358

def end(self, rev):

365

def end(self, rev):

359

return self.start(rev) + self.length(rev)

366

return self.start(rev) + self.length(rev)

360

def length(self, rev):

367

def length(self, rev):

361

return self.index[rev][1]

368

return self.index[rev][1]

362

def chainbase(self, rev):

369

def chainbase(self, rev):

363

index = self.index

370

index = self.index

364

base = index[rev][3]

371

base = index[rev][3]

365

while base != rev:

372

while base != rev:

366

rev = base

373

rev = base

367

base = index[rev][3]

374

base = index[rev][3]

368

return base

375

return base

369

def chainlen(self, rev):

376

def chainlen(self, rev):

370

return self._chaininfo(rev)[0]

377

return self._chaininfo(rev)[0]

371

378

372

def _chaininfo(self, rev):

379

def _chaininfo(self, rev):

373

chaininfocache = self._chaininfocache

380

chaininfocache = self._chaininfocache

374

if rev in chaininfocache:

381

if rev in chaininfocache:

375

return chaininfocache[rev]

382

return chaininfocache[rev]

376

index = self.index

383

index = self.index

377

generaldelta = self._generaldelta

384

generaldelta = self._generaldelta

378

iterrev = rev

385

iterrev = rev

379

e = index[iterrev]

386

e = index[iterrev]

380

clen = 0

387

clen = 0

381

compresseddeltalen = 0

388

compresseddeltalen = 0

382

while iterrev != e[3]:

389

while iterrev != e[3]:

383

clen += 1

390

clen += 1

384

compresseddeltalen += e[1]

391

compresseddeltalen += e[1]

385

if generaldelta:

392

if generaldelta:

386

iterrev = e[3]

393

iterrev = e[3]

387

else:

394

else:

388

iterrev -= 1

395

iterrev -= 1

389

if iterrev in chaininfocache:

396

if iterrev in chaininfocache:

390

t = chaininfocache[iterrev]

397

t = chaininfocache[iterrev]

391

clen += t[0]

398

clen += t[0]

392

compresseddeltalen += t[1]

399

compresseddeltalen += t[1]

393

break

400

break

394

e = index[iterrev]

401

e = index[iterrev]

395

else:

402

else:

396

# Add text length of base since decompressing that also takes

403

# Add text length of base since decompressing that also takes

397

# work. For cache hits the length is already included.

404

# work. For cache hits the length is already included.

398

compresseddeltalen += e[1]

405

compresseddeltalen += e[1]

399

r = (clen, compresseddeltalen)

406

r = (clen, compresseddeltalen)

400

chaininfocache[rev] = r

407

chaininfocache[rev] = r

401

return r

408

return r

402

409

403

def flags(self, rev):

410

def flags(self, rev):

404

return self.index[rev][0] & 0xFFFF

411

return self.index[rev][0] & 0xFFFF

405

def rawsize(self, rev):

412

def rawsize(self, rev):

406

"""return the length of the uncompressed text for a given revision"""

413

"""return the length of the uncompressed text for a given revision"""

407

l = self.index[rev][2]

414

l = self.index[rev][2]

408

if l >= 0:

415

if l >= 0:

409

return l

416

return l

410

417

411

t = self.revision(self.node(rev))

418

t = self.revision(self.node(rev))

412

return len(t)

419

return len(t)

413

size = rawsize

420

size = rawsize

414

421

415

def ancestors(self, revs, stoprev=0, inclusive=False):

422

def ancestors(self, revs, stoprev=0, inclusive=False):

416

"""Generate the ancestors of 'revs' in reverse topological order.

423

"""Generate the ancestors of 'revs' in reverse topological order.

417

Does not generate revs lower than stoprev.

424

Does not generate revs lower than stoprev.

418

425

419

See the documentation for ancestor.lazyancestors for more details."""

426

See the documentation for ancestor.lazyancestors for more details."""

420

427

421

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

428

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

422

inclusive=inclusive)

429

inclusive=inclusive)

423

430

424

def descendants(self, revs):

431

def descendants(self, revs):

425

"""Generate the descendants of 'revs' in revision order.

432

"""Generate the descendants of 'revs' in revision order.

426

433

427

Yield a sequence of revision numbers starting with a child of

434

Yield a sequence of revision numbers starting with a child of

428

some rev in revs, i.e., each revision is *not* considered a

435

some rev in revs, i.e., each revision is *not* considered a

429

descendant of itself. Results are ordered by revision number (a

436

descendant of itself. Results are ordered by revision number (a

430

topological sort)."""

437

topological sort)."""

431

first = min(revs)

438

first = min(revs)

432

if first == nullrev:

439

if first == nullrev:

433

for i in self:

440

for i in self:

434

yield i

441

yield i

435

return

442

return

436

443

437

seen = set(revs)

444

seen = set(revs)

438

for i in self.revs(start=first + 1):

445

for i in self.revs(start=first + 1):

439

for x in self.parentrevs(i):

446

for x in self.parentrevs(i):

440

if x != nullrev and x in seen:

447

if x != nullrev and x in seen:

441

seen.add(i)

448

seen.add(i)

442

yield i

449

yield i

443

break

450

break

444

451

445

def findcommonmissing(self, common=None, heads=None):

452

def findcommonmissing(self, common=None, heads=None):

446

"""Return a tuple of the ancestors of common and the ancestors of heads

453

"""Return a tuple of the ancestors of common and the ancestors of heads

447

that are not ancestors of common. In revset terminology, we return the

454

that are not ancestors of common. In revset terminology, we return the

448

tuple:

455

tuple:

449

456

450

::common, (::heads) - (::common)

457

::common, (::heads) - (::common)

451

458

452

The list is sorted by revision number, meaning it is

459

The list is sorted by revision number, meaning it is

453

topologically sorted.

460

topologically sorted.

454

461

455

'heads' and 'common' are both lists of node IDs. If heads is

462

'heads' and 'common' are both lists of node IDs. If heads is

456

not supplied, uses all of the revlog's heads. If common is not

463

not supplied, uses all of the revlog's heads. If common is not

457

supplied, uses nullid."""

464

supplied, uses nullid."""

458

if common is None:

465

if common is None:

459

common = [nullid]

466

common = [nullid]

460

if heads is None:

467

if heads is None:

461

heads = self.heads()

468

heads = self.heads()

462

469

463

common = [self.rev(n) for n in common]

470

common = [self.rev(n) for n in common]

464

heads = [self.rev(n) for n in heads]

471

heads = [self.rev(n) for n in heads]

465

472

466

# we want the ancestors, but inclusive

473

# we want the ancestors, but inclusive

467

class lazyset(object):

474

class lazyset(object):

468

def __init__(self, lazyvalues):

475

def __init__(self, lazyvalues):

469

self.addedvalues = set()

476

self.addedvalues = set()

470

self.lazyvalues = lazyvalues

477

self.lazyvalues = lazyvalues

471

478

472

def __contains__(self, value):

479

def __contains__(self, value):

473

return value in self.addedvalues or value in self.lazyvalues

480

return value in self.addedvalues or value in self.lazyvalues

474

481

475

def __iter__(self):

482

def __iter__(self):

476

added = self.addedvalues

483

added = self.addedvalues

477

for r in added:

484

for r in added:

478

yield r

485

yield r

479

for r in self.lazyvalues:

486

for r in self.lazyvalues:

480

if not r in added:

487

if not r in added:

481

yield r

488

yield r

482

489

483

def add(self, value):

490

def add(self, value):

484

self.addedvalues.add(value)

491

self.addedvalues.add(value)

485

492

486

def update(self, values):

493

def update(self, values):

487

self.addedvalues.update(values)

494

self.addedvalues.update(values)

488

495

489

has = lazyset(self.ancestors(common))

496

has = lazyset(self.ancestors(common))

490

has.add(nullrev)

497

has.add(nullrev)

491

has.update(common)

498

has.update(common)

492

499

493

# take all ancestors from heads that aren't in has

500

# take all ancestors from heads that aren't in has

494

missing = set()

501

missing = set()

495

visit = collections.deque(r for r in heads if r not in has)

502

visit = collections.deque(r for r in heads if r not in has)

496

while visit:

503

while visit:

497

r = visit.popleft()

504

r = visit.popleft()

498

if r in missing:

505

if r in missing:

499

continue

506

continue

500

else:

507

else:

501

missing.add(r)

508

missing.add(r)

502

for p in self.parentrevs(r):

509

for p in self.parentrevs(r):

503

if p not in has:

510

if p not in has:

504

visit.append(p)

511

visit.append(p)

505

missing = list(missing)

512

missing = list(missing)

506

missing.sort()

513

missing.sort()

507

return has, [self.node(r) for r in missing]

514

return has, [self.node(r) for r in missing]

508

515

509

def incrementalmissingrevs(self, common=None):

516

def incrementalmissingrevs(self, common=None):

510

"""Return an object that can be used to incrementally compute the

517

"""Return an object that can be used to incrementally compute the

511

revision numbers of the ancestors of arbitrary sets that are not

518

revision numbers of the ancestors of arbitrary sets that are not

512

ancestors of common. This is an ancestor.incrementalmissingancestors

519

ancestors of common. This is an ancestor.incrementalmissingancestors

513

object.

520

object.

514

521

515

'common' is a list of revision numbers. If common is not supplied, uses

522

'common' is a list of revision numbers. If common is not supplied, uses

516

nullrev.

523

nullrev.

517

"""

524

"""

518

if common is None:

525

if common is None:

519

common = [nullrev]

526

common = [nullrev]

520

527

521

return ancestor.incrementalmissingancestors(self.parentrevs, common)

528

return ancestor.incrementalmissingancestors(self.parentrevs, common)

522

529

523

def findmissingrevs(self, common=None, heads=None):

530

def findmissingrevs(self, common=None, heads=None):

524

"""Return the revision numbers of the ancestors of heads that

531

"""Return the revision numbers of the ancestors of heads that

525

are not ancestors of common.

532

are not ancestors of common.

526

533

527

More specifically, return a list of revision numbers corresponding to

534

More specifically, return a list of revision numbers corresponding to

528

nodes N such that every N satisfies the following constraints:

535

nodes N such that every N satisfies the following constraints:

529

536

530

1. N is an ancestor of some node in 'heads'

537

1. N is an ancestor of some node in 'heads'

531

2. N is not an ancestor of any node in 'common'

538

2. N is not an ancestor of any node in 'common'

532

539

533

The list is sorted by revision number, meaning it is

540

The list is sorted by revision number, meaning it is

534

topologically sorted.

541

topologically sorted.

535

542

536

'heads' and 'common' are both lists of revision numbers. If heads is

543

'heads' and 'common' are both lists of revision numbers. If heads is

537

not supplied, uses all of the revlog's heads. If common is not

544

not supplied, uses all of the revlog's heads. If common is not

538

supplied, uses nullid."""

545

supplied, uses nullid."""

539

if common is None:

546

if common is None:

540

common = [nullrev]

547

common = [nullrev]

541

if heads is None:

548

if heads is None:

542

heads = self.headrevs()

549

heads = self.headrevs()

543

550

544

inc = self.incrementalmissingrevs(common=common)

551

inc = self.incrementalmissingrevs(common=common)

545

return inc.missingancestors(heads)

552

return inc.missingancestors(heads)

546

553

547

def findmissing(self, common=None, heads=None):

554

def findmissing(self, common=None, heads=None):

548

"""Return the ancestors of heads that are not ancestors of common.

555

"""Return the ancestors of heads that are not ancestors of common.

549

556

550

More specifically, return a list of nodes N such that every N

557

More specifically, return a list of nodes N such that every N

551

satisfies the following constraints:

558

satisfies the following constraints:

552

559

553

1. N is an ancestor of some node in 'heads'

560

1. N is an ancestor of some node in 'heads'

554

2. N is not an ancestor of any node in 'common'

561

2. N is not an ancestor of any node in 'common'

555

562

556

The list is sorted by revision number, meaning it is

563

The list is sorted by revision number, meaning it is

557

topologically sorted.

564

topologically sorted.

558

565

559

'heads' and 'common' are both lists of node IDs. If heads is

566

'heads' and 'common' are both lists of node IDs. If heads is

560

not supplied, uses all of the revlog's heads. If common is not

567

not supplied, uses all of the revlog's heads. If common is not

561

supplied, uses nullid."""

568

supplied, uses nullid."""

562

if common is None:

569

if common is None:

563

common = [nullid]

570

common = [nullid]

564

if heads is None:

571

if heads is None:

565

heads = self.heads()

572

heads = self.heads()

566

573

567

common = [self.rev(n) for n in common]

574

common = [self.rev(n) for n in common]

568

heads = [self.rev(n) for n in heads]

575

heads = [self.rev(n) for n in heads]

569

576

570

inc = self.incrementalmissingrevs(common=common)

577

inc = self.incrementalmissingrevs(common=common)

571

return [self.node(r) for r in inc.missingancestors(heads)]

578

return [self.node(r) for r in inc.missingancestors(heads)]

572

579

573

def nodesbetween(self, roots=None, heads=None):

580

def nodesbetween(self, roots=None, heads=None):

574

"""Return a topological path from 'roots' to 'heads'.

581

"""Return a topological path from 'roots' to 'heads'.

575

582

576

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

583

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

577

topologically sorted list of all nodes N that satisfy both of

584

topologically sorted list of all nodes N that satisfy both of

578

these constraints:

585

these constraints:

579

586

580

1. N is a descendant of some node in 'roots'

587

1. N is a descendant of some node in 'roots'

581

2. N is an ancestor of some node in 'heads'

588

2. N is an ancestor of some node in 'heads'

582

589

583

Every node is considered to be both a descendant and an ancestor

590

Every node is considered to be both a descendant and an ancestor

584

of itself, so every reachable node in 'roots' and 'heads' will be

591

of itself, so every reachable node in 'roots' and 'heads' will be

585

included in 'nodes'.

592

included in 'nodes'.

586

593

587

'outroots' is the list of reachable nodes in 'roots', i.e., the

594

'outroots' is the list of reachable nodes in 'roots', i.e., the

588

subset of 'roots' that is returned in 'nodes'. Likewise,

595

subset of 'roots' that is returned in 'nodes'. Likewise,

589

'outheads' is the subset of 'heads' that is also in 'nodes'.

596

'outheads' is the subset of 'heads' that is also in 'nodes'.

590

597

591

'roots' and 'heads' are both lists of node IDs. If 'roots' is

598

'roots' and 'heads' are both lists of node IDs. If 'roots' is

592

unspecified, uses nullid as the only root. If 'heads' is

599

unspecified, uses nullid as the only root. If 'heads' is

593

unspecified, uses list of all of the revlog's heads."""

600

unspecified, uses list of all of the revlog's heads."""

594

nonodes = ([], [], [])

601

nonodes = ([], [], [])

595

if roots is not None:

602

if roots is not None:

596

roots = list(roots)

603

roots = list(roots)

597

if not roots:

604

if not roots:

598

return nonodes

605

return nonodes

599

lowestrev = min([self.rev(n) for n in roots])

606

lowestrev = min([self.rev(n) for n in roots])

600

else:

607

else:

601

roots = [nullid] # Everybody's a descendant of nullid

608

roots = [nullid] # Everybody's a descendant of nullid

602

lowestrev = nullrev

609

lowestrev = nullrev

603

if (lowestrev == nullrev) and (heads is None):

610

if (lowestrev == nullrev) and (heads is None):

604

# We want _all_ the nodes!

611

# We want _all_ the nodes!

605

return ([self.node(r) for r in self], [nullid], list(self.heads()))

612

return ([self.node(r) for r in self], [nullid], list(self.heads()))

606

if heads is None:

613

if heads is None:

607

# All nodes are ancestors, so the latest ancestor is the last

614

# All nodes are ancestors, so the latest ancestor is the last

608

# node.

615

# node.

609

highestrev = len(self) - 1

616

highestrev = len(self) - 1

610

# Set ancestors to None to signal that every node is an ancestor.

617

# Set ancestors to None to signal that every node is an ancestor.

611

ancestors = None

618

ancestors = None

612

# Set heads to an empty dictionary for later discovery of heads

619

# Set heads to an empty dictionary for later discovery of heads

613

heads = {}

620

heads = {}

614

else:

621

else:

615

heads = list(heads)

622

heads = list(heads)

616

if not heads:

623

if not heads:

617

return nonodes

624

return nonodes

618

ancestors = set()

625

ancestors = set()

619

# Turn heads into a dictionary so we can remove 'fake' heads.

626

# Turn heads into a dictionary so we can remove 'fake' heads.

620

# Also, later we will be using it to filter out the heads we can't

627

# Also, later we will be using it to filter out the heads we can't

621

# find from roots.

628

# find from roots.

622

heads = dict.fromkeys(heads, False)

629

heads = dict.fromkeys(heads, False)

623

# Start at the top and keep marking parents until we're done.

630

# Start at the top and keep marking parents until we're done.

624

nodestotag = set(heads)

631

nodestotag = set(heads)

625

# Remember where the top was so we can use it as a limit later.

632

# Remember where the top was so we can use it as a limit later.

626

highestrev = max([self.rev(n) for n in nodestotag])

633

highestrev = max([self.rev(n) for n in nodestotag])

627

while nodestotag:

634

while nodestotag:

628

# grab a node to tag

635

# grab a node to tag

629

n = nodestotag.pop()

636

n = nodestotag.pop()

630

# Never tag nullid

637

# Never tag nullid

631

if n == nullid:

638

if n == nullid:

632

continue

639

continue

633

# A node's revision number represents its place in a

640

# A node's revision number represents its place in a

634

# topologically sorted list of nodes.

641

# topologically sorted list of nodes.

635

r = self.rev(n)

642

r = self.rev(n)

636

if r >= lowestrev:

643

if r >= lowestrev:

637

if n not in ancestors:

644

if n not in ancestors:

638

# If we are possibly a descendant of one of the roots

645

# If we are possibly a descendant of one of the roots

639

# and we haven't already been marked as an ancestor

646

# and we haven't already been marked as an ancestor

640

ancestors.add(n) # Mark as ancestor

647

ancestors.add(n) # Mark as ancestor

641

# Add non-nullid parents to list of nodes to tag.

648

# Add non-nullid parents to list of nodes to tag.

642

nodestotag.update([p for p in self.parents(n) if

649

nodestotag.update([p for p in self.parents(n) if

643

p != nullid])

650

p != nullid])

644

elif n in heads: # We've seen it before, is it a fake head?

651

elif n in heads: # We've seen it before, is it a fake head?

645

# So it is, real heads should not be the ancestors of

652

# So it is, real heads should not be the ancestors of

646

# any other heads.

653

# any other heads.

647

heads.pop(n)

654

heads.pop(n)

648

if not ancestors:

655

if not ancestors:

649

return nonodes

656

return nonodes

650

# Now that we have our set of ancestors, we want to remove any

657

# Now that we have our set of ancestors, we want to remove any

651

# roots that are not ancestors.

658

# roots that are not ancestors.

652

659

653

# If one of the roots was nullid, everything is included anyway.

660

# If one of the roots was nullid, everything is included anyway.

654

if lowestrev > nullrev:

661

if lowestrev > nullrev:

655

# But, since we weren't, let's recompute the lowest rev to not

662

# But, since we weren't, let's recompute the lowest rev to not

656

# include roots that aren't ancestors.

663

# include roots that aren't ancestors.

657

664

658

# Filter out roots that aren't ancestors of heads

665

# Filter out roots that aren't ancestors of heads

659

roots = [n for n in roots if n in ancestors]

666

roots = [n for n in roots if n in ancestors]

660

# Recompute the lowest revision

667

# Recompute the lowest revision

661

if roots:

668

if roots:

662

lowestrev = min([self.rev(n) for n in roots])

669

lowestrev = min([self.rev(n) for n in roots])

663

else:

670

else:

664

# No more roots? Return empty list

671

# No more roots? Return empty list

665

return nonodes

672

return nonodes

666

else:

673

else:

667

# We are descending from nullid, and don't need to care about

674

# We are descending from nullid, and don't need to care about

668

# any other roots.

675

# any other roots.

669

lowestrev = nullrev

676

lowestrev = nullrev

670

roots = [nullid]

677

roots = [nullid]

671

# Transform our roots list into a set.

678

# Transform our roots list into a set.

672

descendants = set(roots)

679

descendants = set(roots)

673

# Also, keep the original roots so we can filter out roots that aren't

680

# Also, keep the original roots so we can filter out roots that aren't

674

# 'real' roots (i.e. are descended from other roots).

681

# 'real' roots (i.e. are descended from other roots).

675

roots = descendants.copy()

682

roots = descendants.copy()

676

# Our topologically sorted list of output nodes.

683

# Our topologically sorted list of output nodes.

677

orderedout = []

684

orderedout = []

678

# Don't start at nullid since we don't want nullid in our output list,

685

# Don't start at nullid since we don't want nullid in our output list,

679

# and if nullid shows up in descendants, empty parents will look like

686

# and if nullid shows up in descendants, empty parents will look like

680

# they're descendants.

687

# they're descendants.

681

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

688

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

682

n = self.node(r)

689

n = self.node(r)

683

isdescendant = False

690

isdescendant = False

684

if lowestrev == nullrev: # Everybody is a descendant of nullid

691

if lowestrev == nullrev: # Everybody is a descendant of nullid

685

isdescendant = True

692

isdescendant = True

686

elif n in descendants:

693

elif n in descendants:

687

# n is already a descendant

694

# n is already a descendant

688

isdescendant = True

695

isdescendant = True

689

# This check only needs to be done here because all the roots

696

# This check only needs to be done here because all the roots

690

# will start being marked is descendants before the loop.

697

# will start being marked is descendants before the loop.

691

if n in roots:

698

if n in roots:

692

# If n was a root, check if it's a 'real' root.

699

# If n was a root, check if it's a 'real' root.

693

p = tuple(self.parents(n))

700

p = tuple(self.parents(n))

694

# If any of its parents are descendants, it's not a root.

701

# If any of its parents are descendants, it's not a root.

695

if (p[0] in descendants) or (p[1] in descendants):

702

if (p[0] in descendants) or (p[1] in descendants):

696

roots.remove(n)

703

roots.remove(n)

697

else:

704

else:

698

p = tuple(self.parents(n))

705

p = tuple(self.parents(n))

699

# A node is a descendant if either of its parents are

706

# A node is a descendant if either of its parents are

700

# descendants. (We seeded the dependents list with the roots

707

# descendants. (We seeded the dependents list with the roots

701

# up there, remember?)

708

# up there, remember?)

702

if (p[0] in descendants) or (p[1] in descendants):

709

if (p[0] in descendants) or (p[1] in descendants):

703

descendants.add(n)

710

descendants.add(n)

704

isdescendant = True

711

isdescendant = True

705

if isdescendant and ((ancestors is None) or (n in ancestors)):

712

if isdescendant and ((ancestors is None) or (n in ancestors)):

706

# Only include nodes that are both descendants and ancestors.

713

# Only include nodes that are both descendants and ancestors.

707

orderedout.append(n)

714

orderedout.append(n)

708

if (ancestors is not None) and (n in heads):

715

if (ancestors is not None) and (n in heads):

709

# We're trying to figure out which heads are reachable

716

# We're trying to figure out which heads are reachable

710

# from roots.

717

# from roots.

711

# Mark this head as having been reached

718

# Mark this head as having been reached

712

heads[n] = True

719

heads[n] = True

713

elif ancestors is None:

720

elif ancestors is None:

714

# Otherwise, we're trying to discover the heads.

721

# Otherwise, we're trying to discover the heads.

715

# Assume this is a head because if it isn't, the next step

722

# Assume this is a head because if it isn't, the next step

716

# will eventually remove it.

723

# will eventually remove it.

717

heads[n] = True

724

heads[n] = True

718

# But, obviously its parents aren't.

725

# But, obviously its parents aren't.

719

for p in self.parents(n):

726

for p in self.parents(n):

720

heads.pop(p, None)

727

heads.pop(p, None)

721

heads = [n for n, flag in heads.iteritems() if flag]

728

heads = [n for n, flag in heads.iteritems() if flag]

722

roots = list(roots)

729

roots = list(roots)

723

assert orderedout

730

assert orderedout

724

assert roots

731

assert roots

725

assert heads

732

assert heads

726

return (orderedout, roots, heads)

733

return (orderedout, roots, heads)

727

734

728

def headrevs(self):

735

def headrevs(self):

729

try:

736

try:

730

return self.index.headrevs()

737

return self.index.headrevs()

731

except AttributeError:

738

except AttributeError:

732

return self._headrevs()

739

return self._headrevs()

733

740

734

def computephases(self, roots):

741

def computephases(self, roots):

735

return self.index.computephasesmapsets(roots)

742

return self.index.computephasesmapsets(roots)

736

743

737

def _headrevs(self):

744

def _headrevs(self):

738

count = len(self)

745

count = len(self)

739

if not count:

746

if not count:

740

return [nullrev]

747

return [nullrev]

741

# we won't iter over filtered rev so nobody is a head at start

748

# we won't iter over filtered rev so nobody is a head at start

742

ishead = [0] * (count + 1)

749

ishead = [0] * (count + 1)

743

index = self.index

750

index = self.index

744

for r in self:

751

for r in self:

745

ishead[r] = 1 # I may be an head

752

ishead[r] = 1 # I may be an head

746

e = index[r]

753

e = index[r]

747

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

754

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

748

return [r for r, val in enumerate(ishead) if val]

755

return [r for r, val in enumerate(ishead) if val]

749

756

750

def heads(self, start=None, stop=None):

757

def heads(self, start=None, stop=None):

751

"""return the list of all nodes that have no children

758

"""return the list of all nodes that have no children

752

759

753

if start is specified, only heads that are descendants of

760

if start is specified, only heads that are descendants of

754

start will be returned

761

start will be returned

755

if stop is specified, it will consider all the revs from stop

762

if stop is specified, it will consider all the revs from stop

756

as if they had no children

763

as if they had no children

757

"""

764

"""

758

if start is None and stop is None:

765

if start is None and stop is None:

759

if not len(self):

766

if not len(self):

760

return [nullid]

767

return [nullid]

761

return [self.node(r) for r in self.headrevs()]

768

return [self.node(r) for r in self.headrevs()]

762

769

763

if start is None:

770

if start is None:

764

start = nullid

771

start = nullid

765

if stop is None:

772

if stop is None:

766

stop = []

773

stop = []

767

stoprevs = set([self.rev(n) for n in stop])

774

stoprevs = set([self.rev(n) for n in stop])

768

startrev = self.rev(start)

775

startrev = self.rev(start)

769

reachable = set((startrev,))

776

reachable = set((startrev,))

770

heads = set((startrev,))

777

heads = set((startrev,))

771

778

772

parentrevs = self.parentrevs

779

parentrevs = self.parentrevs

773

for r in self.revs(start=startrev + 1):

780

for r in self.revs(start=startrev + 1):

774

for p in parentrevs(r):

781

for p in parentrevs(r):

775

if p in reachable:

782

if p in reachable:

776

if r not in stoprevs:

783

if r not in stoprevs:

777

reachable.add(r)

784

reachable.add(r)

778

heads.add(r)

785

heads.add(r)

779

if p in heads and p not in stoprevs:

786

if p in heads and p not in stoprevs:

780

heads.remove(p)

787

heads.remove(p)

781

788

782

return [self.node(r) for r in heads]

789

return [self.node(r) for r in heads]

783

790

784

def children(self, node):

791

def children(self, node):

785

"""find the children of a given node"""

792

"""find the children of a given node"""

786

c = []

793

c = []

787

p = self.rev(node)

794

p = self.rev(node)

788

for r in self.revs(start=p + 1):

795

for r in self.revs(start=p + 1):

789

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

796

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

790

if prevs:

797

if prevs:

791

for pr in prevs:

798

for pr in prevs:

792

if pr == p:

799

if pr == p:

793

c.append(self.node(r))

800

c.append(self.node(r))

794

elif p == nullrev:

801

elif p == nullrev:

795

c.append(self.node(r))

802

c.append(self.node(r))

796

return c

803

return c

797

804

798

def descendant(self, start, end):

805

def descendant(self, start, end):

799

if start == nullrev:

806

if start == nullrev:

800

return True

807

return True

801

for i in self.descendants([start]):

808

for i in self.descendants([start]):

802

if i == end:

809

if i == end:

803

return True

810

return True

804

elif i > end:

811

elif i > end:

805

break

812

break

806

return False

813

return False

807

814

808

def commonancestorsheads(self, a, b):

815

def commonancestorsheads(self, a, b):

809

"""calculate all the heads of the common ancestors of nodes a and b"""

816

"""calculate all the heads of the common ancestors of nodes a and b"""

810

a, b = self.rev(a), self.rev(b)

817

a, b = self.rev(a), self.rev(b)

811

try:

818

try:

812

ancs = self.index.commonancestorsheads(a, b)

819

ancs = self.index.commonancestorsheads(a, b)

813

except (AttributeError, OverflowError): # C implementation failed

820

except (AttributeError, OverflowError): # C implementation failed

814

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

821

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

815

return map(self.node, ancs)

822

return map(self.node, ancs)

816

823

817

def isancestor(self, a, b):

824

def isancestor(self, a, b):

818

"""return True if node a is an ancestor of node b

825

"""return True if node a is an ancestor of node b

819

826

820

The implementation of this is trivial but the use of

827

The implementation of this is trivial but the use of

821

commonancestorsheads is not."""

828

commonancestorsheads is not."""

822

return a in self.commonancestorsheads(a, b)

829

return a in self.commonancestorsheads(a, b)

823

830

824

def ancestor(self, a, b):

831

def ancestor(self, a, b):

825

"""calculate the "best" common ancestor of nodes a and b"""

832

"""calculate the "best" common ancestor of nodes a and b"""

826

833

827

a, b = self.rev(a), self.rev(b)

834

a, b = self.rev(a), self.rev(b)

828

try:

835

try:

829

ancs = self.index.ancestors(a, b)

836

ancs = self.index.ancestors(a, b)

830

except (AttributeError, OverflowError):

837

except (AttributeError, OverflowError):

831

ancs = ancestor.ancestors(self.parentrevs, a, b)

838

ancs = ancestor.ancestors(self.parentrevs, a, b)

832

if ancs:

839

if ancs:

833

# choose a consistent winner when there's a tie

840

# choose a consistent winner when there's a tie

834

return min(map(self.node, ancs))

841

return min(map(self.node, ancs))

835

return nullid

842

return nullid

836

843

837

def _match(self, id):

844

def _match(self, id):

838

if isinstance(id, int):

845

if isinstance(id, int):

839

# rev

846

# rev

840

return self.node(id)

847

return self.node(id)

841

if len(id) == 20:

848

if len(id) == 20:

842

# possibly a binary node

849

# possibly a binary node

843

# odds of a binary node being all hex in ASCII are 1 in 10**25

850

# odds of a binary node being all hex in ASCII are 1 in 10**25

844

try:

851

try:

845

node = id

852

node = id

846

self.rev(node) # quick search the index

853

self.rev(node) # quick search the index

847

return node

854

return node

848

except LookupError:

855

except LookupError:

849

pass # may be partial hex id

856

pass # may be partial hex id

850

try:

857

try:

851

# str(rev)

858

# str(rev)

852

rev = int(id)

859

rev = int(id)

853

if str(rev) != id:

860

if str(rev) != id:

854

raise ValueError

861

raise ValueError

855

if rev < 0:

862

if rev < 0:

856

rev = len(self) + rev

863

rev = len(self) + rev

857

if rev < 0 or rev >= len(self):

864

if rev < 0 or rev >= len(self):

858

raise ValueError

865

raise ValueError

859

return self.node(rev)

866

return self.node(rev)

860

except (ValueError, OverflowError):

867

except (ValueError, OverflowError):

861

pass

868

pass

862

if len(id) == 40:

869

if len(id) == 40:

863

try:

870

try:

864

# a full hex nodeid?

871

# a full hex nodeid?

865

node = bin(id)

872

node = bin(id)

866

self.rev(node)

873

self.rev(node)

867

return node

874

return node

868

except (TypeError, LookupError):

875

except (TypeError, LookupError):

869

pass

876

pass

870

877

871

def _partialmatch(self, id):

878

def _partialmatch(self, id):

872

try:

879

try:

873

n = self.index.partialmatch(id)

880

n = self.index.partialmatch(id)

874

if n and self.hasnode(n):

881

if n and self.hasnode(n):

875

return n

882

return n

876

return None

883

return None

877

except RevlogError:

884

except RevlogError:

878

# parsers.c radix tree lookup gave multiple matches

885

# parsers.c radix tree lookup gave multiple matches

879

# fall through to slow path that filters hidden revisions

886

# fall through to slow path that filters hidden revisions

880

pass

887

pass

881

except (AttributeError, ValueError):

888

except (AttributeError, ValueError):

882

# we are pure python, or key was too short to search radix tree

889

# we are pure python, or key was too short to search radix tree

883

pass

890

pass

884

891

885

if id in self._pcache:

892

if id in self._pcache:

886

return self._pcache[id]

893

return self._pcache[id]

887

894

888

if len(id) < 40:

895

if len(id) < 40:

889

try:

896

try:

890

# hex(node)[:...]

897

# hex(node)[:...]

891

l = len(id) // 2 # grab an even number of digits

898

l = len(id) // 2 # grab an even number of digits

892

prefix = bin(id[:l * 2])

899

prefix = bin(id[:l * 2])

893

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

900

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

894

nl = [n for n in nl if hex(n).startswith(id) and

901

nl = [n for n in nl if hex(n).startswith(id) and

895

self.hasnode(n)]

902

self.hasnode(n)]

896

if len(nl) > 0:

903

if len(nl) > 0:

897

if len(nl) == 1:

904

if len(nl) == 1:

898

self._pcache[id] = nl[0]

905

self._pcache[id] = nl[0]

899

return nl[0]

906

return nl[0]

900

raise LookupError(id, self.indexfile,

907

raise LookupError(id, self.indexfile,

901

_('ambiguous identifier'))

908

_('ambiguous identifier'))

902

return None

909

return None

903

except TypeError:

910

except TypeError:

904

pass

911

pass

905

912

906

def lookup(self, id):

913

def lookup(self, id):

907

"""locate a node based on:

914

"""locate a node based on:

908

- revision number or str(revision number)

915

- revision number or str(revision number)

909

- nodeid or subset of hex nodeid

916

- nodeid or subset of hex nodeid

910

"""

917

"""

911

n = self._match(id)

918

n = self._match(id)

912

if n is not None:

919

if n is not None:

913

return n

920

return n

914

n = self._partialmatch(id)

921

n = self._partialmatch(id)

915

if n:

922

if n:

916

return n

923

return n

917

924

918

raise LookupError(id, self.indexfile, _('no match found'))

925

raise LookupError(id, self.indexfile, _('no match found'))

919

926

920

def cmp(self, node, text):

927

def cmp(self, node, text):

921

"""compare text with a given file revision

928

"""compare text with a given file revision

922

929

923

returns True if text is different than what is stored.

930

returns True if text is different than what is stored.

924

"""

931

"""

925

p1, p2 = self.parents(node)

932

p1, p2 = self.parents(node)

926

return hash(text, p1, p2) != node

933

return hash(text, p1, p2) != node

927

934

928

def _addchunk(self, offset, data):

935

def _addchunk(self, offset, data):

936

"""Add a segment to the revlog cache.

937

938

Accepts an absolute offset and the data that is at that location.

939

"""

929

o, d = self._chunkcache

940

o, d = self._chunkcache

930

# try to add to existing cache

941

# try to add to existing cache

931

if o + len(d) == offset and len(d) + len(data) < _chunksize:

942

if o + len(d) == offset and len(d) + len(data) < _chunksize:

932

self._chunkcache = o, d + data

943

self._chunkcache = o, d + data

933

else:

944

else:

934

self._chunkcache = offset, data

945

self._chunkcache = offset, data

935

946

936

def _loadchunk(self, offset, length, df=None):

947

def _loadchunk(self, offset, length, df=None):

937

"""Load a ~~chunk/~~segment from the revlog.

948

"""Load a segment of raw data from the revlog.

938

949

939

Accepts absolute offset, length to read, and an optional existing

950

Accepts an absolute offset, length to read, and an optional existing

940

file handle to read from.

951

file handle to read from.

941

952

942

If an existing file handle is passed, it will be seeked and the

953

If an existing file handle is passed, it will be seeked and the

943

original seek position will NOT be restored.

954

original seek position will NOT be restored.

955

956

Returns a str or buffer of raw byte data.

944

"""

957

"""

945

if df is not None:

958

if df is not None:

946

closehandle = False

959

closehandle = False

947

else:

960

else:

948

if self._inline:

961

if self._inline:

949

df = self.opener(self.indexfile)

962

df = self.opener(self.indexfile)

950

else:

963

else:

951

df = self.opener(self.datafile)

964

df = self.opener(self.datafile)

952

closehandle = True

965

closehandle = True

953

966

954

# Cache data both forward and backward around the requested

967

# Cache data both forward and backward around the requested

955

# data, in a fixed size window. This helps speed up operations

968

# data, in a fixed size window. This helps speed up operations

956

# involving reading the revlog backwards.

969

# involving reading the revlog backwards.

957

cachesize = self._chunkcachesize

970

cachesize = self._chunkcachesize

958

realoffset = offset & ~(cachesize - 1)

971

realoffset = offset & ~(cachesize - 1)

959

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

972

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

960

- realoffset)

973

- realoffset)

961

df.seek(realoffset)

974

df.seek(realoffset)

962

d = df.read(reallength)

975

d = df.read(reallength)

963

if closehandle:

976

if closehandle:

964

df.close()

977

df.close()

965

self._addchunk(realoffset, d)

978

self._addchunk(realoffset, d)

966

if offset != realoffset or reallength != length:

979

if offset != realoffset or reallength != length:

967

return util.buffer(d, offset - realoffset, length)

980

return util.buffer(d, offset - realoffset, length)

968

return d

981

return d

969

982

970

def _getchunk(self, offset, length, df=None):

983

def _getchunk(self, offset, length, df=None):

984

"""Obtain a segment of raw data from the revlog.

985

986

Accepts an absolute offset, length of bytes to obtain, and an

987

optional file handle to the already-opened revlog. If the file

988

handle is used, it's original seek position will not be preserved.

989

990

Requests for data may be returned from a cache.

991

992

Returns a str or a buffer instance of raw byte data.

993

"""

971

o, d = self._chunkcache

994

o, d = self._chunkcache

972

l = len(d)

995

l = len(d)

973

996

974

# is it in the cache?

997

# is it in the cache?

975

cachestart = offset - o

998

cachestart = offset - o

976

cacheend = cachestart + length

999

cacheend = cachestart + length

977

if cachestart >= 0 and cacheend <= l:

1000

if cachestart >= 0 and cacheend <= l:

978

if cachestart == 0 and cacheend == l:

1001

if cachestart == 0 and cacheend == l:

979

return d # avoid a copy

1002

return d # avoid a copy

980

return util.buffer(d, cachestart, cacheend - cachestart)

1003

return util.buffer(d, cachestart, cacheend - cachestart)

981

1004

982

return self._loadchunk(offset, length, df=df)

1005

return self._loadchunk(offset, length, df=df)

983

1006

984

def _chunkraw(self, startrev, endrev, df=None):

1007

def _chunkraw(self, startrev, endrev, df=None):

1008

"""Obtain a segment of raw data corresponding to a range of revisions.

1009

1010

Accepts the start and end revisions and an optional already-open

1011

file handle to be used for reading. If the file handle is read, its

1012

seek position will not be preserved.

1013

1014

Requests for data may be satisfied by a cache.

1015

1016

Returns a str or a buffer instance of raw byte data. Callers will

1017

need to call ``self.start(rev)`` and ``self.length()`` to determine

1018

where each revision's data begins and ends.

1019

"""

985

start = self.start(startrev)

1020

start = self.start(startrev)

986

end = self.end(endrev)

1021

end = self.end(endrev)

987

if self._inline:

1022

if self._inline:

988

start += (startrev + 1) * self._io.size

1023

start += (startrev + 1) * self._io.size

989

end += (endrev + 1) * self._io.size

1024

end += (endrev + 1) * self._io.size

990

length = end - start

1025

length = end - start

991

return self._getchunk(start, length, df=df)

1026

return self._getchunk(start, length, df=df)

992

1027

993

def _chunk(self, rev, df=None):

1028

def _chunk(self, rev, df=None):

1029

"""Obtain a single decompressed chunk for a revision.

1030

1031

Accepts an integer revision and an optional already-open file handle

1032

to be used for reading. If used, the seek position of the file will not

1033

be preserved.

1034

1035

Returns a str holding uncompressed data for the requested revision.

1036

"""

994

return decompress(self._chunkraw(rev, rev, df=df))

1037

return decompress(self._chunkraw(rev, rev, df=df))

995

1038

996

def _chunks(self, revs, df=None):

1039

def _chunks(self, revs, df=None):

997

'''faster version of [self._chunk(rev) for rev in revs]

1040

"""Obtain decompressed chunks for the specified revisions.

998

1041

999

Assumes that revs is in ascending order.'''

1042

Accepts an iterable of numeric revisions that are assumed to be in

1043

ascending order. Also accepts an optional already-open file handle

1044

to be used for reading. If used, the seek position of the file will

1045

not be preserved.

1046

1047

This function is similar to calling ``self._chunk()`` multiple times,

1048

but is faster.

1049

1050

Returns a list with decompressed data for each requested revision.

1051

"""

1000

if not revs:

1052

if not revs:

1001

return []

1053

return []

1002

start = self.start

1054

start = self.start

1003

length = self.length

1055

length = self.length

1004

inline = self._inline

1056

inline = self._inline

1005

iosize = self._io.size

1057

iosize = self._io.size

1006

buffer = util.buffer

1058

buffer = util.buffer

1007

1059

1008

l = []

1060

l = []

1009

ladd = l.append

1061

ladd = l.append

1010

1062

1011

# preload the cache

1063

# preload the cache

1012

try:

1064

try:

1013

while True:

1065

while True:

1014

# ensure that the cache doesn't change out from under us

1066

# ensure that the cache doesn't change out from under us

1015

_cache = self._chunkcache

1067

_cache = self._chunkcache

1016

self._chunkraw(revs[0], revs[-1], df=df)

1068

self._chunkraw(revs[0], revs[-1], df=df)

1017

if _cache == self._chunkcache:

1069

if _cache == self._chunkcache:

1018

break

1070

break

1019

offset, data = _cache

1071

offset, data = _cache

1020

except OverflowError:

1072

except OverflowError:

1021

# issue4215 - we can't cache a run of chunks greater than

1073

# issue4215 - we can't cache a run of chunks greater than

1022

# 2G on Windows

1074

# 2G on Windows

1023

return [self._chunk(rev, df=df) for rev in revs]

1075

return [self._chunk(rev, df=df) for rev in revs]

1024

1076

1025

for rev in revs:

1077

for rev in revs:

1026

chunkstart = start(rev)

1078

chunkstart = start(rev)

1027

if inline:

1079

if inline:

1028

chunkstart += (rev + 1) * iosize

1080

chunkstart += (rev + 1) * iosize

1029

chunklength = length(rev)

1081

chunklength = length(rev)

1030

ladd(decompress(buffer(data, chunkstart - offset, chunklength)))

1082

ladd(decompress(buffer(data, chunkstart - offset, chunklength)))

1031

1083

1032

return l

1084

return l

1033

1085

1034

def _chunkclear(self):

1086

def _chunkclear(self):

1087

"""Clear the raw chunk cache."""

1035

self._chunkcache = (0, '')

1088

self._chunkcache = (0, '')

1036

1089

1037

def deltaparent(self, rev):

1090

def deltaparent(self, rev):

1038

"""return deltaparent of the given revision"""

1091

"""return deltaparent of the given revision"""

1039

base = self.index[rev][3]

1092

base = self.index[rev][3]

1040

if base == rev:

1093

if base == rev:

1041

return nullrev

1094

return nullrev

1042

elif self._generaldelta:

1095

elif self._generaldelta:

1043

return base

1096

return base

1044

else:

1097

else:

1045

return rev - 1

1098

return rev - 1

1046

1099

1047

def revdiff(self, rev1, rev2):

1100

def revdiff(self, rev1, rev2):

1048

"""return or calculate a delta between two revisions"""

1101

"""return or calculate a delta between two revisions"""

1049

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1102

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1050

return str(self._chunk(rev2))

1103

return str(self._chunk(rev2))

1051

1104

1052

return mdiff.textdiff(self.revision(rev1),

1105

return mdiff.textdiff(self.revision(rev1),

1053

self.revision(rev2))

1106

self.revision(rev2))

1054

1107

1055

def revision(self, nodeorrev, _df=None):

1108

def revision(self, nodeorrev, _df=None):

1056

"""return an uncompressed revision of a given node or revision

1109

"""return an uncompressed revision of a given node or revision

1057

number.

1110

number.

1058

1111

1059

_df is an existing file handle to read from. It is meant to only be

1112

_df is an existing file handle to read from. It is meant to only be

1060

used internally.

1113

used internally.

1061

"""

1114

"""

1062

if isinstance(nodeorrev, int):

1115

if isinstance(nodeorrev, int):

1063

rev = nodeorrev

1116

rev = nodeorrev

1064

node = self.node(rev)

1117

node = self.node(rev)

1065

else:

1118

else:

1066

node = nodeorrev

1119

node = nodeorrev

1067

rev = None

1120

rev = None

1068

1121

1069

cachedrev = None

1122

cachedrev = None

1070

if node == nullid:

1123

if node == nullid:

1071

return ""

1124

return ""

1072

if self._cache:

1125

if self._cache:

1073

if self._cache[0] == node:

1126

if self._cache[0] == node:

1074

return self._cache[2]

1127

return self._cache[2]

1075

cachedrev = self._cache[1]

1128

cachedrev = self._cache[1]

1076

1129

1077

# look up what we need to read

1130

# look up what we need to read

1078

text = None

1131

text = None

1079

if rev is None:

1132

if rev is None:

1080

rev = self.rev(node)

1133

rev = self.rev(node)

1081

1134

1082

# check rev flags

1135

# check rev flags

1083

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

1136

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

1084

raise RevlogError(_('incompatible revision flag %x') %

1137

raise RevlogError(_('incompatible revision flag %x') %

1085

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

1138

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

1086

1139

1087

# build delta chain

1140

# build delta chain

1088

chain = []

1141

chain = []

1089

index = self.index # for performance

1142

index = self.index # for performance

1090

generaldelta = self._generaldelta

1143

generaldelta = self._generaldelta

1091

iterrev = rev

1144

iterrev = rev

1092

e = index[iterrev]

1145

e = index[iterrev]

1093

while iterrev != e[3] and iterrev != cachedrev:

1146

while iterrev != e[3] and iterrev != cachedrev:

1094

chain.append(iterrev)

1147

chain.append(iterrev)

1095

if generaldelta:

1148

if generaldelta:

1096

iterrev = e[3]

1149

iterrev = e[3]

1097

else:

1150

else:

1098

iterrev -= 1

1151

iterrev -= 1

1099

e = index[iterrev]

1152

e = index[iterrev]

1100

1153

1101

if iterrev == cachedrev:

1154

if iterrev == cachedrev:

1102

# cache hit

1155

# cache hit

1103

text = self._cache[2]

1156

text = self._cache[2]

1104

else:

1157

else:

1105

chain.append(iterrev)

1158

chain.append(iterrev)

1106

chain.reverse()

1159

chain.reverse()

1107

1160

1108

# drop cache to save memory

1161

# drop cache to save memory

1109

self._cache = None

1162

self._cache = None

1110

1163

1111

bins = self._chunks(chain, df=_df)

1164

bins = self._chunks(chain, df=_df)

1112

if text is None:

1165

if text is None:

1113

text = str(bins[0])

1166

text = str(bins[0])

1114

bins = bins[1:]

1167

bins = bins[1:]

1115

1168

1116

text = mdiff.patches(text, bins)

1169

text = mdiff.patches(text, bins)

1117

1170

1118

text = self._checkhash(text, node, rev)

1171

text = self._checkhash(text, node, rev)

1119

1172

1120

self._cache = (node, rev, text)

1173

self._cache = (node, rev, text)

1121

return text

1174

return text

1122

1175

1123

def hash(self, text, p1, p2):

1176

def hash(self, text, p1, p2):

1124

"""Compute a node hash.

1177

"""Compute a node hash.

1125

1178

1126

Available as a function so that subclasses can replace the hash

1179

Available as a function so that subclasses can replace the hash

1127

as needed.

1180

as needed.

1128

"""

1181

"""

1129

return hash(text, p1, p2)

1182

return hash(text, p1, p2)

1130

1183

1131

def _checkhash(self, text, node, rev):

1184

def _checkhash(self, text, node, rev):

1132

p1, p2 = self.parents(node)

1185

p1, p2 = self.parents(node)

1133

self.checkhash(text, p1, p2, node, rev)

1186

self.checkhash(text, p1, p2, node, rev)

1134

return text

1187

return text

1135

1188

1136

def checkhash(self, text, p1, p2, node, rev=None):

1189

def checkhash(self, text, p1, p2, node, rev=None):

1137

if node != self.hash(text, p1, p2):

1190

if node != self.hash(text, p1, p2):

1138

revornode = rev

1191

revornode = rev

1139

if revornode is None:

1192

if revornode is None:

1140

revornode = templatefilters.short(hex(node))

1193

revornode = templatefilters.short(hex(node))

1141

raise RevlogError(_("integrity check failed on %s:%s")

1194

raise RevlogError(_("integrity check failed on %s:%s")

1142

% (self.indexfile, revornode))

1195

% (self.indexfile, revornode))

1143

1196

1144

def checkinlinesize(self, tr, fp=None):

1197

def checkinlinesize(self, tr, fp=None):

1145

"""Check if the revlog is too big for inline and convert if so.

1198

"""Check if the revlog is too big for inline and convert if so.

1146

1199

1147

This should be called after revisions are added to the revlog. If the

1200

This should be called after revisions are added to the revlog. If the

1148

revlog has grown too large to be an inline revlog, it will convert it

1201

revlog has grown too large to be an inline revlog, it will convert it

1149

to use multiple index and data files.

1202

to use multiple index and data files.

1150

"""

1203

"""

1151

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1204

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1152

return

1205

return

1153

1206

1154

trinfo = tr.find(self.indexfile)

1207

trinfo = tr.find(self.indexfile)

1155

if trinfo is None:

1208

if trinfo is None:

1156

raise RevlogError(_("%s not found in the transaction")

1209

raise RevlogError(_("%s not found in the transaction")

1157

% self.indexfile)

1210

% self.indexfile)

1158

1211

1159

trindex = trinfo[2]

1212

trindex = trinfo[2]

1160

if trindex is not None:

1213

if trindex is not None:

1161

dataoff = self.start(trindex)

1214

dataoff = self.start(trindex)

1162

else:

1215

else:

1163

# revlog was stripped at start of transaction, use all leftover data

1216

# revlog was stripped at start of transaction, use all leftover data

1164

trindex = len(self) - 1

1217

trindex = len(self) - 1

1165

dataoff = self.end(-2)

1218

dataoff = self.end(-2)

1166

1219

1167

tr.add(self.datafile, dataoff)

1220

tr.add(self.datafile, dataoff)

1168

1221

1169

if fp:

1222

if fp:

1170

fp.flush()

1223

fp.flush()

1171

fp.close()

1224

fp.close()

1172

1225

1173

df = self.opener(self.datafile, 'w')

1226

df = self.opener(self.datafile, 'w')

1174

try:

1227

try:

1175

for r in self:

1228

for r in self:

1176

df.write(self._chunkraw(r, r))

1229

df.write(self._chunkraw(r, r))

1177

finally:

1230

finally:

1178

df.close()

1231

df.close()

1179

1232

1180

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1233

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1181

self.version &= ~(REVLOGNGINLINEDATA)

1234

self.version &= ~(REVLOGNGINLINEDATA)

1182

self._inline = False

1235

self._inline = False

1183

for i in self:

1236

for i in self:

1184

e = self._io.packentry(self.index[i], self.node, self.version, i)

1237

e = self._io.packentry(self.index[i], self.node, self.version, i)

1185

fp.write(e)

1238

fp.write(e)

1186

1239

1187

# if we don't call close, the temp file will never replace the

1240

# if we don't call close, the temp file will never replace the

1188

# real index

1241

# real index

1189

fp.close()

1242

fp.close()

1190

1243

1191

tr.replace(self.indexfile, trindex * self._io.size)

1244

tr.replace(self.indexfile, trindex * self._io.size)

1192

self._chunkclear()

1245

self._chunkclear()

1193

1246

1194

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1247

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1195

node=None):

1248

node=None):

1196

"""add a revision to the log

1249

"""add a revision to the log

1197

1250

1198

text - the revision data to add

1251

text - the revision data to add

1199

transaction - the transaction object used for rollback

1252

transaction - the transaction object used for rollback

1200

link - the linkrev data to add

1253

link - the linkrev data to add

1201

p1, p2 - the parent nodeids of the revision

1254

p1, p2 - the parent nodeids of the revision

1202

cachedelta - an optional precomputed delta

1255

cachedelta - an optional precomputed delta

1203

node - nodeid of revision; typically node is not specified, and it is

1256

node - nodeid of revision; typically node is not specified, and it is

1204

computed by default as hash(text, p1, p2), however subclasses might

1257

computed by default as hash(text, p1, p2), however subclasses might

1205

use different hashing method (and override checkhash() in such case)

1258

use different hashing method (and override checkhash() in such case)

1206

"""

1259

"""

1207

if link == nullrev:

1260

if link == nullrev:

1208

raise RevlogError(_("attempted to add linkrev -1 to %s")

1261

raise RevlogError(_("attempted to add linkrev -1 to %s")

1209

% self.indexfile)

1262

% self.indexfile)

1210

1263

1211

if len(text) > _maxentrysize:

1264

if len(text) > _maxentrysize:

1212

raise RevlogError(

1265

raise RevlogError(

1213

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1266

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1214

% (self.indexfile, len(text)))

1267

% (self.indexfile, len(text)))

1215

1268

1216

node = node or self.hash(text, p1, p2)

1269

node = node or self.hash(text, p1, p2)

1217

if node in self.nodemap:

1270

if node in self.nodemap:

1218

return node

1271

return node

1219

1272

1220

dfh = None

1273

dfh = None

1221

if not self._inline:

1274

if not self._inline:

1222

dfh = self.opener(self.datafile, "a+")

1275

dfh = self.opener(self.datafile, "a+")

1223

ifh = self.opener(self.indexfile, "a+")

1276

ifh = self.opener(self.indexfile, "a+")

1224

try:

1277

try:

1225

return self._addrevision(node, text, transaction, link, p1, p2,

1278

return self._addrevision(node, text, transaction, link, p1, p2,

1226

REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)

1279

REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)

1227

finally:

1280

finally:

1228

if dfh:

1281

if dfh:

1229

dfh.close()

1282

dfh.close()

1230

ifh.close()

1283

ifh.close()

1231

1284

1232

def compress(self, text):

1285

def compress(self, text):

1233

""" generate a possibly-compressed representation of text """

1286

""" generate a possibly-compressed representation of text """

1234

if not text:

1287

if not text:

1235

return ("", text)

1288

return ("", text)

1236

l = len(text)

1289

l = len(text)

1237

bin = None

1290

bin = None

1238

if l < 44:

1291

if l < 44:

1239

pass

1292

pass

1240

elif l > 1000000:

1293

elif l > 1000000:

1241

# zlib makes an internal copy, thus doubling memory usage for

1294

# zlib makes an internal copy, thus doubling memory usage for

1242

# large files, so lets do this in pieces

1295

# large files, so lets do this in pieces

1243

z = zlib.compressobj()

1296

z = zlib.compressobj()

1244

p = []

1297

p = []

1245

pos = 0

1298

pos = 0

1246

while pos < l:

1299

while pos < l:

1247

pos2 = pos + 2**20

1300

pos2 = pos + 2**20

1248

p.append(z.compress(text[pos:pos2]))

1301

p.append(z.compress(text[pos:pos2]))

1249

pos = pos2

1302

pos = pos2

1250

p.append(z.flush())

1303

p.append(z.flush())

1251

if sum(map(len, p)) < l:

1304

if sum(map(len, p)) < l:

1252

bin = "".join(p)

1305

bin = "".join(p)

1253

else:

1306

else:

1254

bin = _compress(text)

1307

bin = _compress(text)

1255

if bin is None or len(bin) > l:

1308

if bin is None or len(bin) > l:

1256

if text[0] == '\0':

1309

if text[0] == '\0':

1257

return ("", text)

1310

return ("", text)

1258

return ('u', text)

1311

return ('u', text)

1259

return ("", bin)

1312

return ("", bin)

1260

1313

1261

def _isgooddelta(self, d, textlen):

1314

def _isgooddelta(self, d, textlen):

1262

"""Returns True if the given delta is good. Good means that it is within

1315

"""Returns True if the given delta is good. Good means that it is within

1263

the disk span, disk size, and chain length bounds that we know to be

1316

the disk span, disk size, and chain length bounds that we know to be

1264

performant."""

1317

performant."""

1265

if d is None:

1318

if d is None:

1266

return False

1319

return False

1267

1320

1268

# - 'dist' is the distance from the base revision -- bounding it limits

1321

# - 'dist' is the distance from the base revision -- bounding it limits

1269

# the amount of I/O we need to do.

1322

# the amount of I/O we need to do.

1270

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1323

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1271

# to apply -- bounding it limits the amount of CPU we consume.

1324

# to apply -- bounding it limits the amount of CPU we consume.

1272

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1325

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1273

if (dist > textlen * 4 or l > textlen or

1326

if (dist > textlen * 4 or l > textlen or

1274

compresseddeltalen > textlen * 2 or

1327

compresseddeltalen > textlen * 2 or

1275

(self._maxchainlen and chainlen > self._maxchainlen)):

1328

(self._maxchainlen and chainlen > self._maxchainlen)):

1276

return False

1329

return False

1277

1330

1278

return True

1331

return True

1279

1332

1280

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1333

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1281

cachedelta, ifh, dfh, alwayscache=False):

1334

cachedelta, ifh, dfh, alwayscache=False):

1282

"""internal function to add revisions to the log

1335

"""internal function to add revisions to the log

1283

1336

1284

see addrevision for argument descriptions.

1337

see addrevision for argument descriptions.

1285

invariants:

1338

invariants:

1286

- text is optional (can be None); if not set, cachedelta must be set.

1339

- text is optional (can be None); if not set, cachedelta must be set.

1287

if both are set, they must correspond to each other.

1340

if both are set, they must correspond to each other.

1288

"""

1341

"""

1289

btext = [text]

1342

btext = [text]

1290

def buildtext():

1343

def buildtext():

1291

if btext[0] is not None:

1344

if btext[0] is not None:

1292

return btext[0]

1345

return btext[0]

1293

baserev = cachedelta[0]

1346

baserev = cachedelta[0]

1294

delta = cachedelta[1]

1347

delta = cachedelta[1]

1295

# special case deltas which replace entire base; no need to decode

1348

# special case deltas which replace entire base; no need to decode

1296

# base revision. this neatly avoids censored bases, which throw when

1349

# base revision. this neatly avoids censored bases, which throw when

1297

# they're decoded.

1350

# they're decoded.

1298

hlen = struct.calcsize(">lll")

1351

hlen = struct.calcsize(">lll")

1299

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1352

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1300

len(delta) - hlen):

1353

len(delta) - hlen):

1301

btext[0] = delta[hlen:]

1354

btext[0] = delta[hlen:]

1302

else:

1355

else:

1303

if self._inline:

1356

if self._inline:

1304

fh = ifh

1357

fh = ifh

1305

else:

1358

else:

1306

fh = dfh

1359

fh = dfh

1307

basetext = self.revision(self.node(baserev), _df=fh)

1360

basetext = self.revision(self.node(baserev), _df=fh)

1308

btext[0] = mdiff.patch(basetext, delta)

1361

btext[0] = mdiff.patch(basetext, delta)

1309

try:

1362

try:

1310

self.checkhash(btext[0], p1, p2, node)

1363

self.checkhash(btext[0], p1, p2, node)

1311

if flags & REVIDX_ISCENSORED:

1364

if flags & REVIDX_ISCENSORED:

1312

raise RevlogError(_('node %s is not censored') % node)

1365

raise RevlogError(_('node %s is not censored') % node)

1313

except CensoredNodeError:

1366

except CensoredNodeError:

1314

# must pass the censored index flag to add censored revisions

1367

# must pass the censored index flag to add censored revisions

1315

if not flags & REVIDX_ISCENSORED:

1368

if not flags & REVIDX_ISCENSORED:

1316

raise

1369

raise

1317

return btext[0]

1370

return btext[0]

1318

1371

1319

def builddelta(rev):

1372

def builddelta(rev):

1320

# can we use the cached delta?

1373

# can we use the cached delta?

1321

if cachedelta and cachedelta[0] == rev:

1374

if cachedelta and cachedelta[0] == rev:

1322

delta = cachedelta[1]

1375

delta = cachedelta[1]

1323

else:

1376

else:

1324

t = buildtext()

1377

t = buildtext()

1325

if self.iscensored(rev):

1378

if self.iscensored(rev):

1326

# deltas based on a censored revision must replace the

1379

# deltas based on a censored revision must replace the

1327

# full content in one patch, so delta works everywhere

1380

# full content in one patch, so delta works everywhere

1328

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1381

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1329

delta = header + t

1382

delta = header + t

1330

else:

1383

else:

1331

if self._inline:

1384

if self._inline:

1332

fh = ifh

1385

fh = ifh

1333

else:

1386

else:

1334

fh = dfh

1387

fh = dfh

1335

ptext = self.revision(self.node(rev), _df=fh)

1388

ptext = self.revision(self.node(rev), _df=fh)

1336

delta = mdiff.textdiff(ptext, t)

1389

delta = mdiff.textdiff(ptext, t)

1337

data = self.compress(delta)

1390

data = self.compress(delta)

1338

l = len(data[1]) + len(data[0])

1391

l = len(data[1]) + len(data[0])

1339

if basecache[0] == rev:

1392

if basecache[0] == rev:

1340

chainbase = basecache[1]

1393

chainbase = basecache[1]

1341

else:

1394

else:

1342

chainbase = self.chainbase(rev)

1395

chainbase = self.chainbase(rev)

1343

dist = l + offset - self.start(chainbase)

1396

dist = l + offset - self.start(chainbase)

1344

if self._generaldelta:

1397

if self._generaldelta:

1345

base = rev

1398

base = rev

1346

else:

1399

else:

1347

base = chainbase

1400

base = chainbase

1348

chainlen, compresseddeltalen = self._chaininfo(rev)

1401

chainlen, compresseddeltalen = self._chaininfo(rev)

1349

chainlen += 1

1402

chainlen += 1

1350

compresseddeltalen += l

1403

compresseddeltalen += l

1351

return dist, l, data, base, chainbase, chainlen, compresseddeltalen

1404

return dist, l, data, base, chainbase, chainlen, compresseddeltalen

1352

1405

1353

curr = len(self)

1406

curr = len(self)

1354

prev = curr - 1

1407

prev = curr - 1

1355

base = chainbase = curr

1408

base = chainbase = curr

1356

chainlen = None

1409

chainlen = None

1357

offset = self.end(prev)

1410

offset = self.end(prev)

1358

d = None

1411

d = None

1359

if self._basecache is None:

1412

if self._basecache is None:

1360

self._basecache = (prev, self.chainbase(prev))

1413

self._basecache = (prev, self.chainbase(prev))

1361

basecache = self._basecache

1414

basecache = self._basecache

1362

p1r, p2r = self.rev(p1), self.rev(p2)

1415

p1r, p2r = self.rev(p1), self.rev(p2)

1363

1416

1364

# full versions are inserted when the needed deltas

1417

# full versions are inserted when the needed deltas

1365

# become comparable to the uncompressed text

1418

# become comparable to the uncompressed text

1366

if text is None:

1419

if text is None:

1367

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1420

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1368

cachedelta[1])

1421

cachedelta[1])

1369

else:

1422

else:

1370

textlen = len(text)

1423

textlen = len(text)

1371

1424

1372

# should we try to build a delta?

1425

# should we try to build a delta?

1373

if prev != nullrev:

1426

if prev != nullrev:

1374

if cachedelta and self._generaldelta and self._lazydeltabase:

1427

if cachedelta and self._generaldelta and self._lazydeltabase:

1375

# Assume what we received from the server is a good choice

1428

# Assume what we received from the server is a good choice

1376

# build delta will reuse the cache

1429

# build delta will reuse the cache

1377

d = builddelta(cachedelta[0])

1430

d = builddelta(cachedelta[0])

1378

elif self._generaldelta:

1431

elif self._generaldelta:

1379

if p2r != nullrev and self._aggressivemergedeltas:

1432

if p2r != nullrev and self._aggressivemergedeltas:

1380

d = builddelta(p1r)

1433

d = builddelta(p1r)

1381

d2 = builddelta(p2r)

1434

d2 = builddelta(p2r)

1382

p1good = self._isgooddelta(d, textlen)

1435

p1good = self._isgooddelta(d, textlen)

1383

p2good = self._isgooddelta(d2, textlen)

1436

p2good = self._isgooddelta(d2, textlen)

1384

if p1good and p2good:

1437

if p1good and p2good:

1385

# If both are good deltas, choose the smallest

1438

# If both are good deltas, choose the smallest

1386

if d2[1] < d[1]:

1439

if d2[1] < d[1]:

1387

d = d2

1440

d = d2

1388

elif p2good:

1441

elif p2good:

1389

# If only p2 is good, use it

1442

# If only p2 is good, use it

1390

d = d2

1443

d = d2

1391

elif p1good:

1444

elif p1good:

1392

pass

1445

pass

1393

else:

1446

else:

1394

# Neither is good, try against prev to hopefully save us

1447

# Neither is good, try against prev to hopefully save us

1395

# a fulltext.

1448

# a fulltext.

1396

d = builddelta(prev)

1449

d = builddelta(prev)

1397

else:

1450

else:

1398

# Pick whichever parent is closer to us (to minimize the

1451

# Pick whichever parent is closer to us (to minimize the

1399

# chance of having to build a fulltext). Since

1452

# chance of having to build a fulltext). Since

1400

# nullrev == -1, any non-merge commit will always pick p1r.

1453

# nullrev == -1, any non-merge commit will always pick p1r.

1401

drev = p2r if p2r > p1r else p1r

1454

drev = p2r if p2r > p1r else p1r

1402

d = builddelta(drev)

1455

d = builddelta(drev)

1403

# If the chosen delta will result in us making a full text,

1456

# If the chosen delta will result in us making a full text,

1404

# give it one last try against prev.

1457

# give it one last try against prev.

1405

if drev != prev and not self._isgooddelta(d, textlen):

1458

if drev != prev and not self._isgooddelta(d, textlen):

1406

d = builddelta(prev)

1459

d = builddelta(prev)

1407

else:

1460

else:

1408

d = builddelta(prev)

1461

d = builddelta(prev)

1409

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1462

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1410

1463

1411

if not self._isgooddelta(d, textlen):

1464

if not self._isgooddelta(d, textlen):

1412

text = buildtext()

1465

text = buildtext()

1413

data = self.compress(text)

1466

data = self.compress(text)

1414

l = len(data[1]) + len(data[0])

1467

l = len(data[1]) + len(data[0])

1415

base = chainbase = curr

1468

base = chainbase = curr

1416

1469

1417

e = (offset_type(offset, flags), l, textlen,

1470

e = (offset_type(offset, flags), l, textlen,

1418

base, link, p1r, p2r, node)

1471

base, link, p1r, p2r, node)

1419

self.index.insert(-1, e)

1472

self.index.insert(-1, e)

1420

self.nodemap[node] = curr

1473

self.nodemap[node] = curr

1421

1474

1422

entry = self._io.packentry(e, self.node, self.version, curr)

1475

entry = self._io.packentry(e, self.node, self.version, curr)

1423

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1476

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1424

1477

1425

if alwayscache and text is None:

1478

if alwayscache and text is None:

1426

text = buildtext()

1479

text = buildtext()

1427

1480

1428

if type(text) == str: # only accept immutable objects

1481

if type(text) == str: # only accept immutable objects

1429

self._cache = (node, curr, text)

1482

self._cache = (node, curr, text)

1430

self._basecache = (curr, chainbase)

1483

self._basecache = (curr, chainbase)

1431

return node

1484

return node

1432

1485

1433

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1486

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1434

curr = len(self) - 1

1487

curr = len(self) - 1

1435

if not self._inline:

1488

if not self._inline:

1436

transaction.add(self.datafile, offset)

1489

transaction.add(self.datafile, offset)

1437

transaction.add(self.indexfile, curr * len(entry))

1490

transaction.add(self.indexfile, curr * len(entry))

1438

if data[0]:

1491

if data[0]:

1439

dfh.write(data[0])

1492

dfh.write(data[0])

1440

dfh.write(data[1])

1493

dfh.write(data[1])

1441

ifh.write(entry)

1494

ifh.write(entry)

1442

else:

1495

else:

1443

offset += curr * self._io.size

1496

offset += curr * self._io.size

1444

transaction.add(self.indexfile, offset, curr)

1497

transaction.add(self.indexfile, offset, curr)

1445

ifh.write(entry)

1498

ifh.write(entry)

1446

ifh.write(data[0])

1499

ifh.write(data[0])

1447

ifh.write(data[1])

1500

ifh.write(data[1])

1448

self.checkinlinesize(transaction, ifh)

1501

self.checkinlinesize(transaction, ifh)

1449

1502

1450

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1503

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1451

"""

1504

"""

1452

add a delta group

1505

add a delta group

1453

1506

1454

given a set of deltas, add them to the revision log. the

1507

given a set of deltas, add them to the revision log. the

1455

first delta is against its parent, which should be in our

1508

first delta is against its parent, which should be in our

1456

log, the rest are against the previous delta.

1509

log, the rest are against the previous delta.

1457

1510

1458

If ``addrevisioncb`` is defined, it will be called with arguments of

1511

If ``addrevisioncb`` is defined, it will be called with arguments of

1459

this revlog and the node that was added.

1512

this revlog and the node that was added.

1460

"""

1513

"""

1461

1514

1462

# track the base of the current delta log

1515

# track the base of the current delta log

1463

content = []

1516

content = []

1464

node = None

1517

node = None

1465

1518

1466

r = len(self)

1519

r = len(self)

1467

end = 0

1520

end = 0

1468

if r:

1521

if r:

1469

end = self.end(r - 1)

1522

end = self.end(r - 1)

1470

ifh = self.opener(self.indexfile, "a+")

1523

ifh = self.opener(self.indexfile, "a+")

1471

isize = r * self._io.size

1524

isize = r * self._io.size

1472

if self._inline:

1525

if self._inline:

1473

transaction.add(self.indexfile, end + isize, r)

1526

transaction.add(self.indexfile, end + isize, r)

1474

dfh = None

1527

dfh = None

1475

else:

1528

else:

1476

transaction.add(self.indexfile, isize, r)

1529

transaction.add(self.indexfile, isize, r)

1477

transaction.add(self.datafile, end)

1530

transaction.add(self.datafile, end)

1478

dfh = self.opener(self.datafile, "a+")

1531

dfh = self.opener(self.datafile, "a+")

1479

def flush():

1532

def flush():

1480

if dfh:

1533

if dfh:

1481

dfh.flush()

1534

dfh.flush()

1482

ifh.flush()

1535

ifh.flush()

1483

try:

1536

try:

1484

# loop through our set of deltas

1537

# loop through our set of deltas

1485

chain = None

1538

chain = None

1486

while True:

1539

while True:

1487

chunkdata = cg.deltachunk(chain)

1540

chunkdata = cg.deltachunk(chain)

1488

if not chunkdata:

1541

if not chunkdata:

1489

break

1542

break

1490

node = chunkdata['node']

1543

node = chunkdata['node']

1491

p1 = chunkdata['p1']

1544

p1 = chunkdata['p1']

1492

p2 = chunkdata['p2']

1545

p2 = chunkdata['p2']

1493

cs = chunkdata['cs']

1546

cs = chunkdata['cs']

1494

deltabase = chunkdata['deltabase']

1547

deltabase = chunkdata['deltabase']

1495

delta = chunkdata['delta']

1548

delta = chunkdata['delta']

1496

1549

1497

content.append(node)

1550

content.append(node)

1498

1551

1499

link = linkmapper(cs)

1552

link = linkmapper(cs)

1500

if node in self.nodemap:

1553

if node in self.nodemap:

1501

# this can happen if two branches make the same change

1554

# this can happen if two branches make the same change

1502

chain = node

1555

chain = node

1503

continue

1556

continue

1504

1557

1505

for p in (p1, p2):

1558

for p in (p1, p2):

1506

if p not in self.nodemap:

1559

if p not in self.nodemap:

1507

raise LookupError(p, self.indexfile,

1560

raise LookupError(p, self.indexfile,

1508

_('unknown parent'))

1561

_('unknown parent'))

1509

1562

1510

if deltabase not in self.nodemap:

1563

if deltabase not in self.nodemap:

1511

raise LookupError(deltabase, self.indexfile,

1564

raise LookupError(deltabase, self.indexfile,

1512

_('unknown delta base'))

1565

_('unknown delta base'))

1513

1566

1514

baserev = self.rev(deltabase)

1567

baserev = self.rev(deltabase)

1515

1568

1516

if baserev != nullrev and self.iscensored(baserev):

1569

if baserev != nullrev and self.iscensored(baserev):

1517

# if base is censored, delta must be full replacement in a

1570

# if base is censored, delta must be full replacement in a

1518

# single patch operation

1571

# single patch operation

1519

hlen = struct.calcsize(">lll")

1572

hlen = struct.calcsize(">lll")

1520

oldlen = self.rawsize(baserev)

1573

oldlen = self.rawsize(baserev)

1521

newlen = len(delta) - hlen

1574

newlen = len(delta) - hlen

1522

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1575

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1523

raise error.CensoredBaseError(self.indexfile,

1576

raise error.CensoredBaseError(self.indexfile,

1524

self.node(baserev))

1577

self.node(baserev))

1525

1578

1526

flags = REVIDX_DEFAULT_FLAGS

1579

flags = REVIDX_DEFAULT_FLAGS

1527

if self._peek_iscensored(baserev, delta, flush):

1580

if self._peek_iscensored(baserev, delta, flush):

1528

flags |= REVIDX_ISCENSORED

1581

flags |= REVIDX_ISCENSORED

1529

1582

1530

# We assume consumers of addrevisioncb will want to retrieve

1583

# We assume consumers of addrevisioncb will want to retrieve

1531

# the added revision, which will require a call to

1584

# the added revision, which will require a call to

1532

# revision(). revision() will fast path if there is a cache

1585

# revision(). revision() will fast path if there is a cache

1533

# hit. So, we tell _addrevision() to always cache in this case.

1586

# hit. So, we tell _addrevision() to always cache in this case.

1534

chain = self._addrevision(node, None, transaction, link,

1587

chain = self._addrevision(node, None, transaction, link,

1535

p1, p2, flags, (baserev, delta),

1588

p1, p2, flags, (baserev, delta),

1536

ifh, dfh,

1589

ifh, dfh,

1537

alwayscache=bool(addrevisioncb))

1590

alwayscache=bool(addrevisioncb))

1538

1591

1539

if addrevisioncb:

1592

if addrevisioncb:

1540

addrevisioncb(self, chain)

1593

addrevisioncb(self, chain)

1541

1594

1542

if not dfh and not self._inline:

1595

if not dfh and not self._inline:

1543

# addrevision switched from inline to conventional

1596

# addrevision switched from inline to conventional

1544

# reopen the index

1597

# reopen the index

1545

ifh.close()

1598

ifh.close()

1546

dfh = self.opener(self.datafile, "a+")

1599

dfh = self.opener(self.datafile, "a+")

1547

ifh = self.opener(self.indexfile, "a+")

1600

ifh = self.opener(self.indexfile, "a+")

1548

finally:

1601

finally:

1549

if dfh:

1602

if dfh:

1550

dfh.close()

1603

dfh.close()

1551

ifh.close()

1604

ifh.close()

1552

1605

1553

return content

1606

return content

1554

1607

1555

def iscensored(self, rev):

1608

def iscensored(self, rev):

1556

"""Check if a file revision is censored."""

1609

"""Check if a file revision is censored."""

1557

return False

1610

return False

1558

1611

1559

def _peek_iscensored(self, baserev, delta, flush):

1612

def _peek_iscensored(self, baserev, delta, flush):

1560

"""Quickly check if a delta produces a censored revision."""

1613

"""Quickly check if a delta produces a censored revision."""

1561

return False

1614

return False

1562

1615

1563

def getstrippoint(self, minlink):

1616

def getstrippoint(self, minlink):

1564

"""find the minimum rev that must be stripped to strip the linkrev

1617

"""find the minimum rev that must be stripped to strip the linkrev

1565

1618

1566

Returns a tuple containing the minimum rev and a set of all revs that

1619

Returns a tuple containing the minimum rev and a set of all revs that

1567

have linkrevs that will be broken by this strip.

1620

have linkrevs that will be broken by this strip.

1568

"""

1621

"""

1569

brokenrevs = set()

1622

brokenrevs = set()

1570

strippoint = len(self)

1623

strippoint = len(self)

1571

1624

1572

heads = {}

1625

heads = {}

1573

futurelargelinkrevs = set()

1626

futurelargelinkrevs = set()

1574

for head in self.headrevs():

1627

for head in self.headrevs():

1575

headlinkrev = self.linkrev(head)

1628

headlinkrev = self.linkrev(head)

1576

heads[head] = headlinkrev

1629

heads[head] = headlinkrev

1577

if headlinkrev >= minlink:

1630

if headlinkrev >= minlink:

1578

futurelargelinkrevs.add(headlinkrev)

1631

futurelargelinkrevs.add(headlinkrev)

1579

1632

1580

# This algorithm involves walking down the rev graph, starting at the

1633

# This algorithm involves walking down the rev graph, starting at the

1581

# heads. Since the revs are topologically sorted according to linkrev,

1634

# heads. Since the revs are topologically sorted according to linkrev,

1582

# once all head linkrevs are below the minlink, we know there are

1635

# once all head linkrevs are below the minlink, we know there are

1583

# no more revs that could have a linkrev greater than minlink.

1636

# no more revs that could have a linkrev greater than minlink.

1584

# So we can stop walking.

1637

# So we can stop walking.

1585

while futurelargelinkrevs:

1638

while futurelargelinkrevs:

1586

strippoint -= 1

1639

strippoint -= 1

1587

linkrev = heads.pop(strippoint)

1640

linkrev = heads.pop(strippoint)

1588

1641

1589

if linkrev < minlink:

1642

if linkrev < minlink:

1590

brokenrevs.add(strippoint)

1643

brokenrevs.add(strippoint)

1591

else:

1644

else:

1592

futurelargelinkrevs.remove(linkrev)

1645

futurelargelinkrevs.remove(linkrev)

1593

1646

1594

for p in self.parentrevs(strippoint):

1647

for p in self.parentrevs(strippoint):

1595

if p != nullrev:

1648

if p != nullrev:

1596

plinkrev = self.linkrev(p)

1649

plinkrev = self.linkrev(p)

1597

heads[p] = plinkrev

1650

heads[p] = plinkrev

1598

if plinkrev >= minlink:

1651

if plinkrev >= minlink:

1599

futurelargelinkrevs.add(plinkrev)

1652

futurelargelinkrevs.add(plinkrev)

1600

1653

1601

return strippoint, brokenrevs

1654

return strippoint, brokenrevs

1602

1655

1603

def strip(self, minlink, transaction):

1656

def strip(self, minlink, transaction):

1604

"""truncate the revlog on the first revision with a linkrev >= minlink

1657

"""truncate the revlog on the first revision with a linkrev >= minlink

1605

1658

1606

This function is called when we're stripping revision minlink and

1659

This function is called when we're stripping revision minlink and

1607

its descendants from the repository.

1660

its descendants from the repository.

1608

1661

1609

We have to remove all revisions with linkrev >= minlink, because

1662

We have to remove all revisions with linkrev >= minlink, because

1610

the equivalent changelog revisions will be renumbered after the

1663

the equivalent changelog revisions will be renumbered after the

1611

strip.

1664

strip.

1612

1665

1613

So we truncate the revlog on the first of these revisions, and

1666

So we truncate the revlog on the first of these revisions, and

1614

trust that the caller has saved the revisions that shouldn't be

1667

trust that the caller has saved the revisions that shouldn't be

1615

removed and that it'll re-add them after this truncation.

1668

removed and that it'll re-add them after this truncation.

1616

"""

1669

"""

1617

if len(self) == 0:

1670

if len(self) == 0:

1618

return

1671

return

1619

1672

1620

rev, _ = self.getstrippoint(minlink)

1673

rev, _ = self.getstrippoint(minlink)

1621

if rev == len(self):

1674

if rev == len(self):

1622

return

1675

return

1623

1676

1624

# first truncate the files on disk

1677

# first truncate the files on disk

1625

end = self.start(rev)

1678

end = self.start(rev)

1626

if not self._inline:

1679

if not self._inline:

1627

transaction.add(self.datafile, end)

1680

transaction.add(self.datafile, end)

1628

end = rev * self._io.size

1681

end = rev * self._io.size

1629

else:

1682

else:

1630

end += rev * self._io.size

1683

end += rev * self._io.size

1631

1684

1632

transaction.add(self.indexfile, end)

1685

transaction.add(self.indexfile, end)

1633

1686

1634

# then reset internal state in memory to forget those revisions

1687

# then reset internal state in memory to forget those revisions

1635

self._cache = None

1688

self._cache = None

1636

self._chaininfocache = {}

1689

self._chaininfocache = {}

1637

self._chunkclear()

1690

self._chunkclear()

1638

for x in xrange(rev, len(self)):

1691

for x in xrange(rev, len(self)):

1639

del self.nodemap[self.node(x)]

1692

del self.nodemap[self.node(x)]

1640

1693

1641

del self.index[rev:-1]

1694

del self.index[rev:-1]

1642

1695

1643

def checksize(self):

1696

def checksize(self):

1644

expected = 0

1697

expected = 0

1645

if len(self):

1698

if len(self):

1646

expected = max(0, self.end(len(self) - 1))

1699

expected = max(0, self.end(len(self) - 1))

1647

1700

1648

try:

1701

try:

1649

f = self.opener(self.datafile)

1702

f = self.opener(self.datafile)

1650

f.seek(0, 2)

1703

f.seek(0, 2)

1651

actual = f.tell()

1704

actual = f.tell()

1652

f.close()

1705

f.close()

1653

dd = actual - expected

1706

dd = actual - expected

1654

except IOError as inst:

1707

except IOError as inst:

1655

if inst.errno != errno.ENOENT:

1708

if inst.errno != errno.ENOENT:

1656

raise

1709

raise

1657

dd = 0

1710

dd = 0

1658

1711

1659

try:

1712

try:

1660

f = self.opener(self.indexfile)

1713

f = self.opener(self.indexfile)

1661

f.seek(0, 2)

1714

f.seek(0, 2)

1662

actual = f.tell()

1715

actual = f.tell()

1663

f.close()

1716

f.close()

1664

s = self._io.size

1717

s = self._io.size

1665

i = max(0, actual // s)

1718

i = max(0, actual // s)

1666

di = actual - (i * s)

1719

di = actual - (i * s)

1667

if self._inline:

1720

if self._inline:

1668

databytes = 0

1721

databytes = 0

1669

for r in self:

1722

for r in self:

1670

databytes += max(0, self.length(r))

1723

databytes += max(0, self.length(r))

1671

dd = 0

1724

dd = 0

1672

di = actual - len(self) * s - databytes

1725

di = actual - len(self) * s - databytes

1673

except IOError as inst:

1726

except IOError as inst:

1674

if inst.errno != errno.ENOENT:

1727

if inst.errno != errno.ENOENT:

1675

raise

1728

raise

1676

di = 0

1729

di = 0

1677

1730

1678

return (dd, di)

1731

return (dd, di)

1679

1732

1680

def files(self):

1733

def files(self):

1681

res = [self.indexfile]

1734

res = [self.indexfile]

1682

if not self._inline:

1735

if not self._inline:

1683

res.append(self.datafile)

1736

res.append(self.datafile)

1684

return res

1737

return res

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             # import stuff from node for others to import from revlog
             import collections
             from node import bin, hex, nullid, nullrev
             from i18n import _
             import ancestor, mdiff, parsers, error, util, templatefilters
             import struct, zlib, errno
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_DEFAULT_FLAGS = 0
             REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             _nullhash = _sha(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = _sha(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     try:
                         return _decompress(bin)
                     except zlib.error as e:
                         raise RevlogError(_("revlog decompress error: %s") % str(e))
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
+                    # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
+                    # 2-tuple of (rev, baserev) defining the base revision the delta chain
+                    # begins at for a revision.
                     self._basecache = None
+                    # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
+                    # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
+                    # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
+                    # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def chainbase(self, rev):
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     return base
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(r) for r in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return map(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         n = self.index.partialmatch(id)
                         if n and self.hasnode(n):
                             return n
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fall through to slow path that filters hidden revisions
                         pass
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
+                    """Add a segment to the revlog cache.
+                    Accepts an absolute offset and the data that is at that location.
+                    """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length, df=None):
-                    """Load a chunk/segment from the revlog.
+                    """Load a segment of raw data from the revlog.
-                    Accepts absolute offset, length to read, and an optional existing
+                    Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
+                    Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._addchunk(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getchunk(self, offset, length, df=None):
+                    """Obtain a segment of raw data from the revlog.
+                    Accepts an absolute offset, length of bytes to obtain, and an
+                    optional file handle to the already-opened revlog. If the file
+                    handle is used, it's original seek position will not be preserved.
+                    Requests for data may be returned from a cache.
+                    Returns a str or a buffer instance of raw byte data.
+                    """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length, df=df)
                 def _chunkraw(self, startrev, endrev, df=None):
+                    """Obtain a segment of raw data corresponding to a range of revisions.
+                    Accepts the start and end revisions and an optional already-open
+                    file handle to be used for reading. If the file handle is read, its
+                    seek position will not be preserved.
+                    Requests for data may be satisfied by a cache.
+                    Returns a str or a buffer instance of raw byte data. Callers will
+                    need to call ``self.start(rev)`` and ``self.length()`` to determine
+                    where each revision's data begins and ends.
+                    """
                     start = self.start(startrev)
                     end = self.end(endrev)
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return self._getchunk(start, length, df=df)
                 def _chunk(self, rev, df=None):
+                    """Obtain a single decompressed chunk for a revision.
+                    Accepts an integer revision and an optional already-open file handle
+                    to be used for reading. If used, the seek position of the file will not
+                    be preserved.
+                    Returns a str holding uncompressed data for the requested revision.
+                    """
                     return decompress(self._chunkraw(rev, rev, df=df))
                 def _chunks(self, revs, df=None):
-                    '''faster version of [self._chunk(rev) for rev in revs]
+                    """Obtain decompressed chunks for the specified revisions.
-                    Assumes that revs is in ascending order.'''
+                    Accepts an iterable of numeric revisions that are assumed to be in
+                    ascending order. Also accepts an optional already-open file handle
+                    to be used for reading. If used, the seek position of the file will
+                    not be preserved.
+                    This function is similar to calling ``self._chunk()`` multiple times,
+                    but is faster.
+                    Returns a list with decompressed data for each requested revision.
+                    """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     # preload the cache
                     try:
                         while True:
                             # ensure that the cache doesn't change out from under us
                             _cache = self._chunkcache
                             self._chunkraw(revs[0], revs[-1], df=df)
                             if _cache == self._chunkcache:
                                 break
                         offset, data = _cache
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decompress(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
+                    """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return str(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1),
                                           self.revision(rev2))
                 def revision(self, nodeorrev, _df=None):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df is an existing file handle to read from. It is meant to only be
                     used internally.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     if rev is None:
                         rev = self.rev(node)
                     # check rev flags
                     if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
                     # build delta chain
                     chain = []
                     index = self.index # for performance
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != cachedrev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == cachedrev:
                         # cache hit
                         text = self._cache[2]
                     else:
                         chain.append(iterrev)
                     chain.reverse()
                     # drop cache to save memory
                     self._cache = None
                     bins = self._chunks(chain, df=_df)
                     if text is None:
                         text = str(bins[0])
                         bins = bins[1:]
                     text = mdiff.patches(text, bins)
                     text = self._checkhash(text, node, rev)
                     self._cache = (node, rev, text)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _checkhash(self, text, node, rev):
                     p1, p2 = self.parents(node)
                     self.checkhash(text, p1, p2, node, rev)
                     return text
                 def checkhash(self, text, p1, p2, node, rev=None):
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r))
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if len(text) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(text)))
                     node = node or self.hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  REVIDX_DEFAULT_FLAGS, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, text):
                     """ generate a possibly-compressed representation of text """
                     if not text:
                         return ("", text)
                     l = len(text)
                     bin = None
                     if l < 44:
                         pass
                     elif l > 1000000:
                         # zlib makes an internal copy, thus doubling memory usage for
                         # large files, so lets do this in pieces
                         z = zlib.compressobj()
                         p = []
                         pos = 0
                         while pos < l:
                             pos2 = pos + 2**20
                             p.append(z.compress(text[pos:pos2]))
                             pos = pos2
                         p.append(z.flush())
                         if sum(map(len, p)) < l:
                             bin = "".join(p)
                     else:
                         bin = _compress(text)
                     if bin is None or len(bin) > l:
                         if text[0] == '\0':
                             return ("", text)
                         return ('u', text)
                     return ("", bin)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(self.node(baserev), _df=fh)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             self.checkhash(btext[0], p1, p2, node)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(self.node(rev), _df=fh)
                                 delta = mdiff.textdiff(ptext, t)
                         data = self.compress(delta)
                         l = len(data[1]) + len(data[0])
                         if basecache[0] == rev:
                             chainbase = basecache[1]
                         else:
                             chainbase = self.chainbase(rev)
                         dist = l + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += l
                         return dist, l, data, base, chainbase, chainlen, compresseddeltalen
                     curr = len(self)
                     prev = curr - 1
                     base = chainbase = curr
                     chainlen = None
                     offset = self.end(prev)
                     d = None
                     if self._basecache is None:
                         self._basecache = (prev, self.chainbase(prev))
                     basecache = self._basecache
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     # should we try to build a delta?
                     if prev != nullrev:
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             d = builddelta(cachedelta[0])
                         elif self._generaldelta:
                             if p2r != nullrev and self._aggressivemergedeltas:
                                 d = builddelta(p1r)
                                 d2 = builddelta(p2r)
                                 p1good = self._isgooddelta(d, textlen)
                                 p2good = self._isgooddelta(d2, textlen)
                                 if p1good and p2good:
                                     # If both are good deltas, choose the smallest
                                     if d2[1] < d[1]:
                                         d = d2
                                 elif p2good:
                                     # If only p2 is good, use it
                                     d = d2
                                 elif p1good:
                                     pass
                                 else:
                                     # Neither is good, try against prev to hopefully save us
                                     # a fulltext.
                                     d = builddelta(prev)
                             else:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext). Since
                                 # nullrev == -1, any non-merge commit will always pick p1r.
                                 drev = p2r if p2r > p1r else p1r
                                 d = builddelta(drev)
                                 # If the chosen delta will result in us making a full text,
                                 # give it one last try against prev.
                                 if drev != prev and not self._isgooddelta(d, textlen):
                                     d = builddelta(prev)
                         else:
                             d = builddelta(prev)
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if not self._isgooddelta(d, textlen):
                         text = buildtext()
                         data = self.compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and text is None:
                         text = buildtext()
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._basecache = (curr, chainbase)
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         while True:
                             chunkdata = cg.deltachunk(chain)
                             if not chunkdata:
                                 break
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             flags = REVIDX_DEFAULT_FLAGS
                             if self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb))
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res