upstream/mercurial-mirror Commit - r5544:686899a7

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import _

14

from i18n import _

15

import binascii, changegroup, errno, ancestor, mdiff, os

15

import binascii, changegroup, errno, ancestor, mdiff, os

16

import sha, struct, util, zlib

16

import sha, struct, util, zlib

17

18

_pack = struct.pack

18

_pack = struct.pack

19

_unpack = struct.unpack

19

_unpack = struct.unpack

20

_compress = zlib.compress

20

_compress = zlib.compress

21

_decompress = zlib.decompress

21

_decompress = zlib.decompress

22

_sha = sha.new

22

_sha = sha.new

23

24

# revlog flags

24

# revlog flags

25

REVLOGV0 = 0

25

REVLOGV0 = 0

26

REVLOGNG = 1

26

REVLOGNG = 1

27

REVLOGNGINLINEDATA = (1 << 16)

27

REVLOGNGINLINEDATA = (1 << 16)

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

31

32

class RevlogError(Exception):

32

class RevlogError(Exception):

33

pass

33

pass

34

class LookupError(RevlogError):

34

class LookupError(RevlogError):

35

pass

35

pass

36

37

def getoffset(q):

37

def getoffset(q):

38

return int(q >> 16)

38

return int(q >> 16)

39

40

def gettype(q):

40

def gettype(q):

41

return int(q & 0xFFFF)

41

return int(q & 0xFFFF)

42

43

def offset_type(offset, type):

43

def offset_type(offset, type):

44

return long(long(offset) << 16 | type)

44

return long(long(offset) << 16 | type)

45

46

def hash(text, p1, p2):

46

def hash(text, p1, p2):

47

"""generate a hash from the given text and its parent hashes

47

"""generate a hash from the given text and its parent hashes

48

49

This hash combines both the current file contents and its history

49

This hash combines both the current file contents and its history

50

in a manner that makes it easy to distinguish nodes with the same

50

in a manner that makes it easy to distinguish nodes with the same

51

content in the revision graph.

51

content in the revision graph.

52

"""

52

"""

53

l = [p1, p2]

53

l = [p1, p2]

54

l.sort()

54

l.sort()

55

s = _sha(l[0])

55

s = _sha(l[0])

56

s.update(l[1])

56

s.update(l[1])

57

s.update(text)

57

s.update(text)

58

return s.digest()

58

return s.digest()

59

60

def compress(text):

60

def compress(text):

61

""" generate a possibly-compressed representation of text """

61

""" generate a possibly-compressed representation of text """

62

if not text:

62

if not text:

63

return ("", text)

63

return ("", text)

64

l = len(text)

64

l = len(text)

65

bin = None

65

bin = None

66

if l < 44:

66

if l < 44:

67

pass

67

pass

68

elif l > 1000000:

68

elif l > 1000000:

69

# zlib makes an internal copy, thus doubling memory usage for

69

# zlib makes an internal copy, thus doubling memory usage for

70

# large files, so lets do this in pieces

70

# large files, so lets do this in pieces

71

z = zlib.compressobj()

71

z = zlib.compressobj()

72

p = []

72

p = []

73

pos = 0

73

pos = 0

74

while pos < l:

74

while pos < l:

75

pos2 = pos + 2**20

75

pos2 = pos + 2**20

76

p.append(z.compress(text[pos:pos2]))

76

p.append(z.compress(text[pos:pos2]))

77

pos = pos2

77

pos = pos2

78

p.append(z.flush())

78

p.append(z.flush())

79

if sum(map(len, p)) < l:

79

if sum(map(len, p)) < l:

80

bin = "".join(p)

80

bin = "".join(p)

81

else:

81

else:

82

bin = _compress(text)

82

bin = _compress(text)

83

if bin is None or len(bin) > l:

83

if bin is None or len(bin) > l:

84

if text[0] == '\0':

84

if text[0] == '\0':

85

return ("", text)

85

return ("", text)

86

return ('u', text)

86

return ('u', text)

87

return ("", bin)

87

return ("", bin)

88

89

def decompress(bin):

89

def decompress(bin):

90

""" decompress the given input """

90

""" decompress the given input """

91

if not bin:

91

if not bin:

92

return bin

92

return bin

93

t = bin[0]

93

t = bin[0]

94

if t == '\0':

94

if t == '\0':

95

return bin

95

return bin

96

if t == 'x':

96

if t == 'x':

97

return _decompress(bin)

97

return _decompress(bin)

98

if t == 'u':

98

if t == 'u':

99

return bin[1:]

99

return bin[1:]

100

raise RevlogError(_("unknown compression type %r") % t)

100

raise RevlogError(_("unknown compression type %r") % t)

101

102

class lazyparser(object):

102

class lazyparser(object):

103

"""

103

"""

104

this class avoids the need to parse the entirety of large indices

104

this class avoids the need to parse the entirety of large indices

105

"""

105

"""

106

107

# lazyparser is not safe to use on windows if win32 extensions not

107

# lazyparser is not safe to use on windows if win32 extensions not

108

# available. it keeps file handle open, which make it not possible

108

# available. it keeps file handle open, which make it not possible

109

# to break hardlinks on local cloned repos.

109

# to break hardlinks on local cloned repos.

110

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

110

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

111

hasattr(util, 'win32api'))

111

hasattr(util, 'win32api'))

112

113

def __init__(self, dataf, size):

113

def __init__(self, dataf, size):

114

self.dataf = dataf

114

self.dataf = dataf

115

self.s = struct.calcsize(indexformatng)

115

self.s = struct.calcsize(indexformatng)

116

self.datasize = size

116

self.datasize = size

117

self.l = size/self.s

117

self.l = size/self.s

118

self.index = [None] * self.l

118

self.index = [None] * self.l

119

self.map = {nullid: nullrev}

119

self.map = {nullid: nullrev}

120

self.allmap = 0

120

self.allmap = 0

121

self.all = 0

121

self.all = 0

122

self.mapfind_count = 0

122

self.mapfind_count = 0

123

124

def loadmap(self):

124

def loadmap(self):

125

"""

125

"""

126

during a commit, we need to make sure the rev being added is

126

during a commit, we need to make sure the rev being added is

127

not a duplicate. This requires loading the entire index,

127

not a duplicate. This requires loading the entire index,

128

which is fairly slow. loadmap can load up just the node map,

128

which is fairly slow. loadmap can load up just the node map,

129

which takes much less time.

129

which takes much less time.

130

"""

130

"""

131

if self.allmap:

131

if self.allmap:

132

return

132

return

133

end = self.datasize

133

end = self.datasize

134

self.allmap = 1

134

self.allmap = 1

135

cur = 0

135

cur = 0

136

count = 0

136

count = 0

137

blocksize = self.s * 256

137

blocksize = self.s * 256

138

self.dataf.seek(0)

138

self.dataf.seek(0)

139

while cur < end:

139

while cur < end:

140

data = self.dataf.read(blocksize)

140

data = self.dataf.read(blocksize)

141

off = 0

141

off = 0

142

for x in xrange(256):

142

for x in xrange(256):

143

n = data[off + ngshaoffset:off + ngshaoffset + 20]

143

n = data[off + ngshaoffset:off + ngshaoffset + 20]

144

self.map[n] = count

144

self.map[n] = count

145

count += 1

145

count += 1

146

if count >= self.l:

146

if count >= self.l:

147

break

147

break

148

off += self.s

148

off += self.s

149

cur += blocksize

149

cur += blocksize

150

151

def loadblock(self, blockstart, blocksize, data=None):

151

def loadblock(self, blockstart, blocksize, data=None):

152

if self.all:

152

if self.all:

153

return

153

return

154

if data is None:

154

if data is None:

155

self.dataf.seek(blockstart)

155

self.dataf.seek(blockstart)

156

if blockstart + blocksize > self.datasize:

156

if blockstart + blocksize > self.datasize:

157

# the revlog may have grown since we've started running,

157

# the revlog may have grown since we've started running,

158

# but we don't have space in self.index for more entries.

158

# but we don't have space in self.index for more entries.

159

# limit blocksize so that we don't get too much data.

159

# limit blocksize so that we don't get too much data.

160

blocksize = max(self.datasize - blockstart, 0)

160

blocksize = max(self.datasize - blockstart, 0)

161

data = self.dataf.read(blocksize)

161

data = self.dataf.read(blocksize)

162

lend = len(data) / self.s

162

lend = len(data) / self.s

163

i = blockstart / self.s

163

i = blockstart / self.s

164

off = 0

164

off = 0

165

# lazyindex supports __delitem__

165

# lazyindex supports __delitem__

166

if lend > len(self.index) - i:

166

if lend > len(self.index) - i:

167

lend = len(self.index) - i

167

lend = len(self.index) - i

168

for x in xrange(lend):

168

for x in xrange(lend):

169

if self.index[i + x] == None:

169

if self.index[i + x] == None:

170

b = data[off : off + self.s]

170

b = data[off : off + self.s]

171

self.index[i + x] = b

171

self.index[i + x] = b

172

n = b[ngshaoffset:ngshaoffset + 20]

172

n = b[ngshaoffset:ngshaoffset + 20]

173

self.map[n] = i + x

173

self.map[n] = i + x

174

off += self.s

174

off += self.s

175

176

def findnode(self, node):

176

def findnode(self, node):

177

"""search backwards through the index file for a specific node"""

177

"""search backwards through the index file for a specific node"""

178

if self.allmap:

178

if self.allmap:

179

return None

179

return None

180

181

# hg log will cause many many searches for the manifest

181

# hg log will cause many many searches for the manifest

182

# nodes. After we get called a few times, just load the whole

182

# nodes. After we get called a few times, just load the whole

183

# thing.

183

# thing.

184

if self.mapfind_count > 8:

184

if self.mapfind_count > 8:

185

self.loadmap()

185

self.loadmap()

186

if node in self.map:

186

if node in self.map:

187

return node

187

return node

188

return None

188

return None

189

self.mapfind_count += 1

189

self.mapfind_count += 1

190

last = self.l - 1

190

last = self.l - 1

191

while self.index[last] != None:

191

while self.index[last] != None:

192

if last == 0:

192

if last == 0:

193

self.all = 1

193

self.all = 1

194

self.allmap = 1

194

self.allmap = 1

195

return None

195

return None

196

last -= 1

196

last -= 1

197

end = (last + 1) * self.s

197

end = (last + 1) * self.s

198

blocksize = self.s * 256

198

blocksize = self.s * 256

199

while end >= 0:

199

while end >= 0:

200

start = max(end - blocksize, 0)

200

start = max(end - blocksize, 0)

201

self.dataf.seek(start)

201

self.dataf.seek(start)

202

data = self.dataf.read(end - start)

202

data = self.dataf.read(end - start)

203

findend = end - start

203

findend = end - start

204

while True:

204

while True:

205

# we're searching backwards, so we have to make sure

205

# we're searching backwards, so we have to make sure

206

# we don't find a changeset where this node is a parent

206

# we don't find a changeset where this node is a parent

207

off = data.find(node, 0, findend)

207

off = data.find(node, 0, findend)

208

findend = off

208

findend = off

209

if off >= 0:

209

if off >= 0:

210

i = off / self.s

210

i = off / self.s

211

off = i * self.s

211

off = i * self.s

212

n = data[off + ngshaoffset:off + ngshaoffset + 20]

212

n = data[off + ngshaoffset:off + ngshaoffset + 20]

213

if n == node:

213

if n == node:

214

self.map[n] = i + start / self.s

214

self.map[n] = i + start / self.s

215

return node

215

return node

216

else:

216

else:

217

break

217

break

218

end -= blocksize

218

end -= blocksize

219

return None

219

return None

220

221

def loadindex(self, i=None, end=None):

221

def loadindex(self, i=None, end=None):

222

if self.all:

222

if self.all:

223

return

223

return

224

all = False

224

all = False

225

if i == None:

225

if i == None:

226

blockstart = 0

226

blockstart = 0

227

blocksize = (65536 / self.s) * self.s

227

blocksize = (65536 / self.s) * self.s

228

end = self.datasize

228

end = self.datasize

229

all = True

229

all = True

230

else:

230

else:

231

if end:

231

if end:

232

blockstart = i * self.s

232

blockstart = i * self.s

233

end = end * self.s

233

end = end * self.s

234

blocksize = end - blockstart

234

blocksize = end - blockstart

235

else:

235

else:

236

blockstart = (i & ~1023) * self.s

236

blockstart = (i & ~1023) * self.s

237

blocksize = self.s * 1024

237

blocksize = self.s * 1024

238

end = blockstart + blocksize

238

end = blockstart + blocksize

239

while blockstart < end:

239

while blockstart < end:

240

self.loadblock(blockstart, blocksize)

240

self.loadblock(blockstart, blocksize)

241

blockstart += blocksize

241

blockstart += blocksize

242

if all:

242

if all:

243

self.all = True

243

self.all = True

244

245

class lazyindex(object):

245

class lazyindex(object):

246

"""a lazy version of the index array"""

246

"""a lazy version of the index array"""

247

def __init__(self, parser):

247

def __init__(self, parser):

248

self.p = parser

248

self.p = parser

249

def __len__(self):

249

def __len__(self):

250

return len(self.p.index)

250

return len(self.p.index)

251

def load(self, pos):

251

def load(self, pos):

252

if pos < 0:

252

if pos < 0:

253

pos += len(self.p.index)

253

pos += len(self.p.index)

254

self.p.loadindex(pos)

254

self.p.loadindex(pos)

255

return self.p.index[pos]

255

return self.p.index[pos]

256

def __getitem__(self, pos):

256

def __getitem__(self, pos):

257

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

257

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

258

def __setitem__(self, pos, item):

258

def __setitem__(self, pos, item):

259

self.p.index[pos] = _pack(indexformatng, *item)

259

self.p.index[pos] = _pack(indexformatng, *item)

260

def __delitem__(self, pos):

260

def __delitem__(self, pos):

261

del self.p.index[pos]

261

del self.p.index[pos]

262

def insert(self, pos, e):

262

def insert(self, pos, e):

263

self.p.index.insert(pos, _pack(indexformatng, *e))

263

self.p.index.insert(pos, _pack(indexformatng, *e))

264

def append(self, e):

264

def append(self, e):

265

self.p.index.append(_pack(indexformatng, *e))

265

self.p.index.append(_pack(indexformatng, *e))

266

267

class lazymap(object):

267

class lazymap(object):

268

"""a lazy version of the node map"""

268

"""a lazy version of the node map"""

269

def __init__(self, parser):

269

def __init__(self, parser):

270

self.p = parser

270

self.p = parser

271

def load(self, key):

271

def load(self, key):

272

n = self.p.findnode(key)

272

n = self.p.findnode(key)

273

if n == None:

273

if n == None:

274

raise KeyError(key)

274

raise KeyError(key)

275

def __contains__(self, key):

275

def __contains__(self, key):

276

if key in self.p.map:

276

if key in self.p.map:

277

return True

277

return True

278

self.p.loadmap()

278

self.p.loadmap()

279

return key in self.p.map

279

return key in self.p.map

280

def __iter__(self):

280

def __iter__(self):

281

yield nullid

281

yield nullid

282

for i in xrange(self.p.l):

282

for i in xrange(self.p.l):

283

ret = self.p.index[i]

283

ret = self.p.index[i]

284

if not ret:

284

if not ret:

285

self.p.loadindex(i)

285

self.p.loadindex(i)

286

ret = self.p.index[i]

286

ret = self.p.index[i]

287

if isinstance(ret, str):

287

if isinstance(ret, str):

288

ret = _unpack(indexformatng, ret)

288

ret = _unpack(indexformatng, ret)

289

yield ret[7]

289

yield ret[7]

290

def __getitem__(self, key):

290

def __getitem__(self, key):

291

try:

291

try:

292

return self.p.map[key]

292

return self.p.map[key]

293

except KeyError:

293

except KeyError:

294

try:

294

try:

295

self.load(key)

295

self.load(key)

296

return self.p.map[key]

296

return self.p.map[key]

297

except KeyError:

297

except KeyError:

298

raise KeyError("node " + hex(key))

298

raise KeyError("node " + hex(key))

299

def __setitem__(self, key, val):

299

def __setitem__(self, key, val):

300

self.p.map[key] = val

300

self.p.map[key] = val

301

def __delitem__(self, key):

301

def __delitem__(self, key):

302

del self.p.map[key]

302

del self.p.map[key]

303

304

indexformatv0 = ">4l20s20s20s"

304

indexformatv0 = ">4l20s20s20s"

305

v0shaoffset = 56

305

v0shaoffset = 56

306

307

class revlogoldio(object):

307

class revlogoldio(object):

308

def __init__(self):

308

def __init__(self):

309

self.size = struct.calcsize(indexformatv0)

309

self.size = struct.calcsize(indexformatv0)

310

311

def parseindex(self, fp, inline):

311

def parseindex(self, fp, inline):

312

s = self.size

312

s = self.size

313

index = []

313

index = []

314

nodemap = {nullid: nullrev}

314

nodemap = {nullid: nullrev}

315

n = off = 0

315

n = off = 0

316

data = fp.read()

316

data = fp.read()

317

l = len(data)

317

l = len(data)

318

while off + s <= l:

318

while off + s <= l:

319

cur = data[off:off + s]

319

cur = data[off:off + s]

320

off += s

320

off += s

321

e = _unpack(indexformatv0, cur)

321

e = _unpack(indexformatv0, cur)

322

# transform to revlogv1 format

322

# transform to revlogv1 format

323

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

323

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

324

nodemap[e[4]], nodemap[e[5]], e[6])

324

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

325

index.append(e2)

325

index.append(e2)

326

nodemap[e[6]] = n

326

nodemap[e[6]] = n

327

n += 1

327

n += 1

328

329

return index, nodemap, None

329

return index, nodemap, None

330

331

def packentry(self, entry, node, version, rev):

331

def packentry(self, entry, node, version, rev):

332

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

332

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

333

node(entry[5]), node(entry[6]), entry[7])

333

node(entry[5]), node(entry[6]), entry[7])

334

return _pack(indexformatv0, *e2)

334

return _pack(indexformatv0, *e2)

335

336

# index ng:

336

# index ng:

337

# 6 bytes offset

337

# 6 bytes offset

338

# 2 bytes flags

338

# 2 bytes flags

339

# 4 bytes compressed length

339

# 4 bytes compressed length

340

# 4 bytes uncompressed length

340

# 4 bytes uncompressed length

341

# 4 bytes: base rev

341

# 4 bytes: base rev

342

# 4 bytes link rev

342

# 4 bytes link rev

343

# 4 bytes parent 1 rev

343

# 4 bytes parent 1 rev

344

# 4 bytes parent 2 rev

344

# 4 bytes parent 2 rev

345

# 32 bytes: nodeid

345

# 32 bytes: nodeid

346

indexformatng = ">Qiiiiii20s12x"

346

indexformatng = ">Qiiiiii20s12x"

347

ngshaoffset = 32

347

ngshaoffset = 32

348

versionformat = ">I"

348

versionformat = ">I"

349

350

class revlogio(object):

350

class revlogio(object):

351

def __init__(self):

351

def __init__(self):

352

self.size = struct.calcsize(indexformatng)

352

self.size = struct.calcsize(indexformatng)

353

354

def parseindex(self, fp, inline):

354

def parseindex(self, fp, inline):

355

try:

355

try:

356

size = util.fstat(fp).st_size

356

size = util.fstat(fp).st_size

357

except AttributeError:

357

except AttributeError:

358

size = 0

358

size = 0

359

360

if lazyparser.safe_to_use and not inline and size > 1000000:

360

if lazyparser.safe_to_use and not inline and size > 1000000:

361

# big index, let's parse it on demand

361

# big index, let's parse it on demand

362

parser = lazyparser(fp, size)

362

parser = lazyparser(fp, size)

363

index = lazyindex(parser)

363

index = lazyindex(parser)

364

nodemap = lazymap(parser)

364

nodemap = lazymap(parser)

365

e = list(index[0])

365

e = list(index[0])

366

type = gettype(e[0])

366

type = gettype(e[0])

367

e[0] = offset_type(0, type)

367

e[0] = offset_type(0, type)

368

index[0] = e

368

index[0] = e

369

return index, nodemap, None

369

return index, nodemap, None

370

371

s = self.size

371

s = self.size

372

cache = None

372

cache = None

373

index = []

373

index = []

374

nodemap = {nullid: nullrev}

374

nodemap = {nullid: nullrev}

375

n = off = 0

375

n = off = 0

376

# if we're not using lazymap, always read the whole index

376

# if we're not using lazymap, always read the whole index

377

data = fp.read()

377

data = fp.read()

378

l = len(data) - s

378

l = len(data) - s

379

append = index.append

379

append = index.append

380

if inline:

380

if inline:

381

cache = (0, data)

381

cache = (0, data)

382

while off <= l:

382

while off <= l:

383

e = _unpack(indexformatng, data[off:off + s])

383

e = _unpack(indexformatng, data[off:off + s])

384

nodemap[e[7]] = n

384

nodemap[e[7]] = n

385

append(e)

385

append(e)

386

n += 1

386

n += 1

387

if e[1] < 0:

387

if e[1] < 0:

388

break

388

break

389

off += e[1] + s

389

off += e[1] + s

390

else:

390

else:

391

while off <= l:

391

while off <= l:

392

e = _unpack(indexformatng, data[off:off + s])

392

e = _unpack(indexformatng, data[off:off + s])

393

nodemap[e[7]] = n

393

nodemap[e[7]] = n

394

append(e)

394

append(e)

395

n += 1

395

n += 1

396

off += s

396

off += s

397

398

e = list(index[0])

398

e = list(index[0])

399

type = gettype(e[0])

399

type = gettype(e[0])

400

e[0] = offset_type(0, type)

400

e[0] = offset_type(0, type)

401

index[0] = e

401

index[0] = e

402

403

return index, nodemap, cache

403

return index, nodemap, cache

404

405

def packentry(self, entry, node, version, rev):

405

def packentry(self, entry, node, version, rev):

406

p = _pack(indexformatng, *entry)

406

p = _pack(indexformatng, *entry)

407

if rev == 0:

407

if rev == 0:

408

p = _pack(versionformat, version) + p[4:]

408

p = _pack(versionformat, version) + p[4:]

409

return p

409

return p

410

411

class revlog(object):

411

class revlog(object):

412

"""

412

"""

413

the underlying revision storage object

413

the underlying revision storage object

414

415

A revlog consists of two parts, an index and the revision data.

415

A revlog consists of two parts, an index and the revision data.

416

417

The index is a file with a fixed record size containing

417

The index is a file with a fixed record size containing

418

information on each revision, includings its nodeid (hash), the

418

information on each revision, includings its nodeid (hash), the

419

nodeids of its parents, the position and offset of its data within

419

nodeids of its parents, the position and offset of its data within

420

the data file, and the revision it's based on. Finally, each entry

420

the data file, and the revision it's based on. Finally, each entry

421

contains a linkrev entry that can serve as a pointer to external

421

contains a linkrev entry that can serve as a pointer to external

422

data.

422

data.

423

424

The revision data itself is a linear collection of data chunks.

424

The revision data itself is a linear collection of data chunks.

425

Each chunk represents a revision and is usually represented as a

425

Each chunk represents a revision and is usually represented as a

426

delta against the previous chunk. To bound lookup time, runs of

426

delta against the previous chunk. To bound lookup time, runs of

427

deltas are limited to about 2 times the length of the original

427

deltas are limited to about 2 times the length of the original

428

version data. This makes retrieval of a version proportional to

428

version data. This makes retrieval of a version proportional to

429

its size, or O(1) relative to the number of revisions.

429

its size, or O(1) relative to the number of revisions.

430

431

Both pieces of the revlog are written to in an append-only

431

Both pieces of the revlog are written to in an append-only

432

fashion, which means we never need to rewrite a file to insert or

432

fashion, which means we never need to rewrite a file to insert or

433

remove data, and can use some simple techniques to avoid the need

433

remove data, and can use some simple techniques to avoid the need

434

for locking while reading.

434

for locking while reading.

435

"""

435

"""

436

def __init__(self, opener, indexfile):

436

def __init__(self, opener, indexfile):

437

"""

437

"""

438

create a revlog object

438

create a revlog object

439

440

opener is a function that abstracts the file opening operation

440

opener is a function that abstracts the file opening operation

441

and can be used to implement COW semantics or the like.

441

and can be used to implement COW semantics or the like.

442

"""

442

"""

443

self.indexfile = indexfile

443

self.indexfile = indexfile

444

self.datafile = indexfile[:-2] + ".d"

444

self.datafile = indexfile[:-2] + ".d"

445

self.opener = opener

445

self.opener = opener

446

self._cache = None

446

self._cache = None

447

self._chunkcache = None

447

self._chunkcache = None

448

self.nodemap = {nullid: nullrev}

448

self.nodemap = {nullid: nullrev}

449

self.index = []

449

self.index = []

450

451

v = REVLOG_DEFAULT_VERSION

451

v = REVLOG_DEFAULT_VERSION

452

if hasattr(opener, "defversion"):

452

if hasattr(opener, "defversion"):

453

v = opener.defversion

453

v = opener.defversion

454

if v & REVLOGNG:

454

if v & REVLOGNG:

455

v |= REVLOGNGINLINEDATA

455

v |= REVLOGNGINLINEDATA

456

457

i = ""

457

i = ""

458

try:

458

try:

459

f = self.opener(self.indexfile)

459

f = self.opener(self.indexfile)

460

i = f.read(4)

460

i = f.read(4)

461

f.seek(0)

461

f.seek(0)

462

if len(i) > 0:

462

if len(i) > 0:

463

v = struct.unpack(versionformat, i)[0]

463

v = struct.unpack(versionformat, i)[0]

464

except IOError, inst:

464

except IOError, inst:

465

if inst.errno != errno.ENOENT:

465

if inst.errno != errno.ENOENT:

466

raise

466

raise

467

468

self.version = v

468

self.version = v

469

self._inline = v & REVLOGNGINLINEDATA

469

self._inline = v & REVLOGNGINLINEDATA

470

flags = v & ~0xFFFF

470

flags = v & ~0xFFFF

471

fmt = v & 0xFFFF

471

fmt = v & 0xFFFF

472

if fmt == REVLOGV0 and flags:

472

if fmt == REVLOGV0 and flags:

473

raise RevlogError(_("index %s unknown flags %#04x for format v0")

473

raise RevlogError(_("index %s unknown flags %#04x for format v0")

474

% (self.indexfile, flags >> 16))

474

% (self.indexfile, flags >> 16))

475

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

475

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

476

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

476

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

477

% (self.indexfile, flags >> 16))

477

% (self.indexfile, flags >> 16))

478

elif fmt > REVLOGNG:

478

elif fmt > REVLOGNG:

479

raise RevlogError(_("index %s unknown format %d")

479

raise RevlogError(_("index %s unknown format %d")

480

% (self.indexfile, fmt))

480

% (self.indexfile, fmt))

481

482

self._io = revlogio()

482

self._io = revlogio()

483

if self.version == REVLOGV0:

483

if self.version == REVLOGV0:

484

self._io = revlogoldio()

484

self._io = revlogoldio()

485

if i:

485

if i:

486

d = self._io.parseindex(f, self._inline)

486

d = self._io.parseindex(f, self._inline)

487

self.index, self.nodemap, self._chunkcache = d

487

self.index, self.nodemap, self._chunkcache = d

488

489

# add the magic null revision at -1

489

# add the magic null revision at -1

490

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

490

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

491

492

def _loadindex(self, start, end):

492

def _loadindex(self, start, end):

493

"""load a block of indexes all at once from the lazy parser"""

493

"""load a block of indexes all at once from the lazy parser"""

494

if isinstance(self.index, lazyindex):

494

if isinstance(self.index, lazyindex):

495

self.index.p.loadindex(start, end)

495

self.index.p.loadindex(start, end)

496

497

def _loadindexmap(self):

497

def _loadindexmap(self):

498

"""loads both the map and the index from the lazy parser"""

498

"""loads both the map and the index from the lazy parser"""

499

if isinstance(self.index, lazyindex):

499

if isinstance(self.index, lazyindex):

500

p = self.index.p

500

p = self.index.p

501

p.loadindex()

501

p.loadindex()

502

self.nodemap = p.map

502

self.nodemap = p.map

503

504

def _loadmap(self):

504

def _loadmap(self):

505

"""loads the map from the lazy parser"""

505

"""loads the map from the lazy parser"""

506

if isinstance(self.nodemap, lazymap):

506

if isinstance(self.nodemap, lazymap):

507

self.nodemap.p.loadmap()

507

self.nodemap.p.loadmap()

508

self.nodemap = self.nodemap.p.map

508

self.nodemap = self.nodemap.p.map

509

510

def tip(self):

510

def tip(self):

511

return self.node(len(self.index) - 2)

511

return self.node(len(self.index) - 2)

512

def count(self):

512

def count(self):

513

return len(self.index) - 1

513

return len(self.index) - 1

514

515

def rev(self, node):

515

def rev(self, node):

516

try:

516

try:

517

return self.nodemap[node]

517

return self.nodemap[node]

518

except KeyError:

518

except KeyError:

519

raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))

519

raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))

520

def node(self, rev):

520

def node(self, rev):

521

return self.index[rev][7]

521

return self.index[rev][7]

522

def linkrev(self, node):

522

def linkrev(self, node):

523

return self.index[self.rev(node)][4]

523

return self.index[self.rev(node)][4]

524

def parents(self, node):

524

def parents(self, node):

525

d = self.index[self.rev(node)][5:7]

525

d = self.index[self.rev(node)][5:7]

526

return (self.node(d[0]), self.node(d[1]))

526

return (self.node(d[0]), self.node(d[1]))

527

def parentrevs(self, rev):

527

def parentrevs(self, rev):

528

return self.index[rev][5:7]

528

return self.index[rev][5:7]

529

def start(self, rev):

529

def start(self, rev):

530

return int(self.index[rev][0] >> 16)

530

return int(self.index[rev][0] >> 16)

531

def end(self, rev):

531

def end(self, rev):

532

return self.start(rev) + self.length(rev)

532

return self.start(rev) + self.length(rev)

533

def length(self, rev):

533

def length(self, rev):

534

return self.index[rev][1]

534

return self.index[rev][1]

535

def base(self, rev):

535

def base(self, rev):

536

return self.index[rev][3]

536

return self.index[rev][3]

537

538

def size(self, rev):

538

def size(self, rev):

539

"""return the length of the uncompressed text for a given revision"""

539

"""return the length of the uncompressed text for a given revision"""

540

l = self.index[rev][2]

540

l = self.index[rev][2]

541

if l >= 0:

541

if l >= 0:

542

return l

542

return l

543

544

t = self.revision(self.node(rev))

544

t = self.revision(self.node(rev))

545

return len(t)

545

return len(t)

546

547

# alternate implementation, The advantage to this code is it

547

# alternate implementation, The advantage to this code is it

548

# will be faster for a single revision. But, the results are not

548

# will be faster for a single revision. But, the results are not

549

# cached, so finding the size of every revision will be slower.

549

# cached, so finding the size of every revision will be slower.

550

"""

550

"""

551

if self.cache and self.cache[1] == rev:

551

if self.cache and self.cache[1] == rev:

552

return len(self.cache[2])

552

return len(self.cache[2])

553

554

base = self.base(rev)

554

base = self.base(rev)

555

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

555

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

556

base = self.cache[1]

556

base = self.cache[1]

557

text = self.cache[2]

557

text = self.cache[2]

558

else:

558

else:

559

text = self.revision(self.node(base))

559

text = self.revision(self.node(base))

560

561

l = len(text)

561

l = len(text)

562

for x in xrange(base + 1, rev + 1):

562

for x in xrange(base + 1, rev + 1):

563

l = mdiff.patchedsize(l, self.chunk(x))

563

l = mdiff.patchedsize(l, self.chunk(x))

564

return l

564

return l

565

"""

565

"""

566

567

def reachable(self, node, stop=None):

567

def reachable(self, node, stop=None):

568

"""return a hash of all nodes ancestral to a given node, including

568

"""return a hash of all nodes ancestral to a given node, including

569

the node itself, stopping when stop is matched"""

569

the node itself, stopping when stop is matched"""

570

reachable = {}

570

reachable = {}

571

visit = [node]

571

visit = [node]

572

reachable[node] = 1

572

reachable[node] = 1

573

if stop:

573

if stop:

574

stopn = self.rev(stop)

574

stopn = self.rev(stop)

575

else:

575

else:

576

stopn = 0

576

stopn = 0

577

while visit:

577

while visit:

578

n = visit.pop(0)

578

n = visit.pop(0)

579

if n == stop:

579

if n == stop:

580

continue

580

continue

581

if n == nullid:

581

if n == nullid:

582

continue

582

continue

583

for p in self.parents(n):

583

for p in self.parents(n):

584

if self.rev(p) < stopn:

584

if self.rev(p) < stopn:

585

continue

585

continue

586

if p not in reachable:

586

if p not in reachable:

587

reachable[p] = 1

587

reachable[p] = 1

588

visit.append(p)

588

visit.append(p)

589

return reachable

589

return reachable

590

591

def nodesbetween(self, roots=None, heads=None):

591

def nodesbetween(self, roots=None, heads=None):

592

"""Return a tuple containing three elements. Elements 1 and 2 contain

592

"""Return a tuple containing three elements. Elements 1 and 2 contain

593

a final list bases and heads after all the unreachable ones have been

593

a final list bases and heads after all the unreachable ones have been

594

pruned. Element 0 contains a topologically sorted list of all

594

pruned. Element 0 contains a topologically sorted list of all

595

596

nodes that satisfy these constraints:

596

nodes that satisfy these constraints:

597

1. All nodes must be descended from a node in roots (the nodes on

597

1. All nodes must be descended from a node in roots (the nodes on

598

roots are considered descended from themselves).

598

roots are considered descended from themselves).

599

2. All nodes must also be ancestors of a node in heads (the nodes in

599

2. All nodes must also be ancestors of a node in heads (the nodes in

600

heads are considered to be their own ancestors).

600

heads are considered to be their own ancestors).

601

602

If roots is unspecified, nullid is assumed as the only root.

602

If roots is unspecified, nullid is assumed as the only root.

603

If heads is unspecified, it is taken to be the output of the

603

If heads is unspecified, it is taken to be the output of the

604

heads method (i.e. a list of all nodes in the repository that

604

heads method (i.e. a list of all nodes in the repository that

605

have no children)."""

605

have no children)."""

606

nonodes = ([], [], [])

606

nonodes = ([], [], [])

607

if roots is not None:

607

if roots is not None:

608

roots = list(roots)

608

roots = list(roots)

609

if not roots:

609

if not roots:

610

return nonodes

610

return nonodes

611

lowestrev = min([self.rev(n) for n in roots])

611

lowestrev = min([self.rev(n) for n in roots])

612

else:

612

else:

613

roots = [nullid] # Everybody's a descendent of nullid

613

roots = [nullid] # Everybody's a descendent of nullid

614

lowestrev = nullrev

614

lowestrev = nullrev

615

if (lowestrev == nullrev) and (heads is None):

615

if (lowestrev == nullrev) and (heads is None):

616

# We want _all_ the nodes!

616

# We want _all_ the nodes!

617

return ([self.node(r) for r in xrange(0, self.count())],

617

return ([self.node(r) for r in xrange(0, self.count())],

618

[nullid], list(self.heads()))

618

[nullid], list(self.heads()))

619

if heads is None:

619

if heads is None:

620

# All nodes are ancestors, so the latest ancestor is the last

620

# All nodes are ancestors, so the latest ancestor is the last

621

# node.

621

# node.

622

highestrev = self.count() - 1

622

highestrev = self.count() - 1

623

# Set ancestors to None to signal that every node is an ancestor.

623

# Set ancestors to None to signal that every node is an ancestor.

624

ancestors = None

624

ancestors = None

625

# Set heads to an empty dictionary for later discovery of heads

625

# Set heads to an empty dictionary for later discovery of heads

626

heads = {}

626

heads = {}

627

else:

627

else:

628

heads = list(heads)

628

heads = list(heads)

629

if not heads:

629

if not heads:

630

return nonodes

630

return nonodes

631

ancestors = {}

631

ancestors = {}

632

# Turn heads into a dictionary so we can remove 'fake' heads.

632

# Turn heads into a dictionary so we can remove 'fake' heads.

633

# Also, later we will be using it to filter out the heads we can't

633

# Also, later we will be using it to filter out the heads we can't

634

# find from roots.

634

# find from roots.

635

heads = dict.fromkeys(heads, 0)

635

heads = dict.fromkeys(heads, 0)

636

# Start at the top and keep marking parents until we're done.

636

# Start at the top and keep marking parents until we're done.

637

nodestotag = heads.keys()

637

nodestotag = heads.keys()

638

# Remember where the top was so we can use it as a limit later.

638

# Remember where the top was so we can use it as a limit later.

639

highestrev = max([self.rev(n) for n in nodestotag])

639

highestrev = max([self.rev(n) for n in nodestotag])

640

while nodestotag:

640

while nodestotag:

641

# grab a node to tag

641

# grab a node to tag

642

n = nodestotag.pop()

642

n = nodestotag.pop()

643

# Never tag nullid

643

# Never tag nullid

644

if n == nullid:

644

if n == nullid:

645

continue

645

continue

646

# A node's revision number represents its place in a

646

# A node's revision number represents its place in a

647

# topologically sorted list of nodes.

647

# topologically sorted list of nodes.

648

r = self.rev(n)

648

r = self.rev(n)

649

if r >= lowestrev:

649

if r >= lowestrev:

650

if n not in ancestors:

650

if n not in ancestors:

651

# If we are possibly a descendent of one of the roots

651

# If we are possibly a descendent of one of the roots

652

# and we haven't already been marked as an ancestor

652

# and we haven't already been marked as an ancestor

653

ancestors[n] = 1 # Mark as ancestor

653

ancestors[n] = 1 # Mark as ancestor

654

# Add non-nullid parents to list of nodes to tag.

654

# Add non-nullid parents to list of nodes to tag.

655

nodestotag.extend([p for p in self.parents(n) if

655

nodestotag.extend([p for p in self.parents(n) if

656

p != nullid])

656

p != nullid])

657

elif n in heads: # We've seen it before, is it a fake head?

657

elif n in heads: # We've seen it before, is it a fake head?

658

# So it is, real heads should not be the ancestors of

658

# So it is, real heads should not be the ancestors of

659

# any other heads.

659

# any other heads.

660

heads.pop(n)

660

heads.pop(n)

661

if not ancestors:

661

if not ancestors:

662

return nonodes

662

return nonodes

663

# Now that we have our set of ancestors, we want to remove any

663

# Now that we have our set of ancestors, we want to remove any

664

# roots that are not ancestors.

664

# roots that are not ancestors.

665

666

# If one of the roots was nullid, everything is included anyway.

666

# If one of the roots was nullid, everything is included anyway.

667

if lowestrev > nullrev:

667

if lowestrev > nullrev:

668

# But, since we weren't, let's recompute the lowest rev to not

668

# But, since we weren't, let's recompute the lowest rev to not

669

# include roots that aren't ancestors.

669

# include roots that aren't ancestors.

670

671

# Filter out roots that aren't ancestors of heads

671

# Filter out roots that aren't ancestors of heads

672

roots = [n for n in roots if n in ancestors]

672

roots = [n for n in roots if n in ancestors]

673

# Recompute the lowest revision

673

# Recompute the lowest revision

674

if roots:

674

if roots:

675

lowestrev = min([self.rev(n) for n in roots])

675

lowestrev = min([self.rev(n) for n in roots])

676

else:

676

else:

677

# No more roots? Return empty list

677

# No more roots? Return empty list

678

return nonodes

678

return nonodes

679

else:

679

else:

680

# We are descending from nullid, and don't need to care about

680

# We are descending from nullid, and don't need to care about

681

# any other roots.

681

# any other roots.

682

lowestrev = nullrev

682

lowestrev = nullrev

683

roots = [nullid]

683

roots = [nullid]

684

# Transform our roots list into a 'set' (i.e. a dictionary where the

684

# Transform our roots list into a 'set' (i.e. a dictionary where the

685

# values don't matter.

685

# values don't matter.

686

descendents = dict.fromkeys(roots, 1)

686

descendents = dict.fromkeys(roots, 1)

687

# Also, keep the original roots so we can filter out roots that aren't

687

# Also, keep the original roots so we can filter out roots that aren't

688

# 'real' roots (i.e. are descended from other roots).

688

# 'real' roots (i.e. are descended from other roots).

689

roots = descendents.copy()

689

roots = descendents.copy()

690

# Our topologically sorted list of output nodes.

690

# Our topologically sorted list of output nodes.

691

orderedout = []

691

orderedout = []

692

# Don't start at nullid since we don't want nullid in our output list,

692

# Don't start at nullid since we don't want nullid in our output list,

693

# and if nullid shows up in descedents, empty parents will look like

693

# and if nullid shows up in descedents, empty parents will look like

694

# they're descendents.

694

# they're descendents.

695

for r in xrange(max(lowestrev, 0), highestrev + 1):

695

for r in xrange(max(lowestrev, 0), highestrev + 1):

696

n = self.node(r)

696

n = self.node(r)

697

isdescendent = False

697

isdescendent = False

698

if lowestrev == nullrev: # Everybody is a descendent of nullid

698

if lowestrev == nullrev: # Everybody is a descendent of nullid

699

isdescendent = True

699

isdescendent = True

700

elif n in descendents:

700

elif n in descendents:

701

# n is already a descendent

701

# n is already a descendent

702

isdescendent = True

702

isdescendent = True

703

# This check only needs to be done here because all the roots

703

# This check only needs to be done here because all the roots

704

# will start being marked is descendents before the loop.

704

# will start being marked is descendents before the loop.

705

if n in roots:

705

if n in roots:

706

# If n was a root, check if it's a 'real' root.

706

# If n was a root, check if it's a 'real' root.

707

p = tuple(self.parents(n))

707

p = tuple(self.parents(n))

708

# If any of its parents are descendents, it's not a root.

708

# If any of its parents are descendents, it's not a root.

709

if (p[0] in descendents) or (p[1] in descendents):

709

if (p[0] in descendents) or (p[1] in descendents):

710

roots.pop(n)

710

roots.pop(n)

711

else:

711

else:

712

p = tuple(self.parents(n))

712

p = tuple(self.parents(n))

713

# A node is a descendent if either of its parents are

713

# A node is a descendent if either of its parents are

714

# descendents. (We seeded the dependents list with the roots

714

# descendents. (We seeded the dependents list with the roots

715

# up there, remember?)

715

# up there, remember?)

716

if (p[0] in descendents) or (p[1] in descendents):

716

if (p[0] in descendents) or (p[1] in descendents):

717

descendents[n] = 1

717

descendents[n] = 1

718

isdescendent = True

718

isdescendent = True

719

if isdescendent and ((ancestors is None) or (n in ancestors)):

719

if isdescendent and ((ancestors is None) or (n in ancestors)):

720

# Only include nodes that are both descendents and ancestors.

720

# Only include nodes that are both descendents and ancestors.

721

orderedout.append(n)

721

orderedout.append(n)

722

if (ancestors is not None) and (n in heads):

722

if (ancestors is not None) and (n in heads):

723

# We're trying to figure out which heads are reachable

723

# We're trying to figure out which heads are reachable

724

# from roots.

724

# from roots.

725

# Mark this head as having been reached

725

# Mark this head as having been reached

726

heads[n] = 1

726

heads[n] = 1

727

elif ancestors is None:

727

elif ancestors is None:

728

# Otherwise, we're trying to discover the heads.

728

# Otherwise, we're trying to discover the heads.

729

# Assume this is a head because if it isn't, the next step

729

# Assume this is a head because if it isn't, the next step

730

# will eventually remove it.

730

# will eventually remove it.

731

heads[n] = 1

731

heads[n] = 1

732

# But, obviously its parents aren't.

732

# But, obviously its parents aren't.

733

for p in self.parents(n):

733

for p in self.parents(n):

734

heads.pop(p, None)

734

heads.pop(p, None)

735

heads = [n for n in heads.iterkeys() if heads[n] != 0]

735

heads = [n for n in heads.iterkeys() if heads[n] != 0]

736

roots = roots.keys()

736

roots = roots.keys()

737

assert orderedout

737

assert orderedout

738

assert roots

738

assert roots

739

assert heads

739

assert heads

740

return (orderedout, roots, heads)

740

return (orderedout, roots, heads)

741

742

def heads(self, start=None, stop=None):

742

def heads(self, start=None, stop=None):

743

"""return the list of all nodes that have no children

743

"""return the list of all nodes that have no children

744

745

if start is specified, only heads that are descendants of

745

if start is specified, only heads that are descendants of

746

start will be returned

746

start will be returned

747

if stop is specified, it will consider all the revs from stop

747

if stop is specified, it will consider all the revs from stop

748

as if they had no children

748

as if they had no children

749

"""

749

"""

750

if start is None and stop is None:

750

if start is None and stop is None:

751

count = self.count()

751

count = self.count()

752

if not count:

752

if not count:

753

return [nullid]

753

return [nullid]

754

ishead = [1] * (count + 1)

754

ishead = [1] * (count + 1)

755

index = self.index

755

index = self.index

756

for r in xrange(count):

756

for r in xrange(count):

757

e = index[r]

757

e = index[r]

758

ishead[e[5]] = ishead[e[6]] = 0

758

ishead[e[5]] = ishead[e[6]] = 0

759

return [self.node(r) for r in xrange(count) if ishead[r]]

759

return [self.node(r) for r in xrange(count) if ishead[r]]

760

761

if start is None:

761

if start is None:

762

start = nullid

762

start = nullid

763

if stop is None:

763

if stop is None:

764

stop = []

764

stop = []

765

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

765

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

766

startrev = self.rev(start)

766

startrev = self.rev(start)

767

reachable = {startrev: 1}

767

reachable = {startrev: 1}

768

heads = {startrev: 1}

768

heads = {startrev: 1}

769

770

parentrevs = self.parentrevs

770

parentrevs = self.parentrevs

771

for r in xrange(startrev + 1, self.count()):

771

for r in xrange(startrev + 1, self.count()):

772

for p in parentrevs(r):

772

for p in parentrevs(r):

773

if p in reachable:

773

if p in reachable:

774

if r not in stoprevs:

774

if r not in stoprevs:

775

reachable[r] = 1

775

reachable[r] = 1

776

heads[r] = 1

776

heads[r] = 1

777

if p in heads and p not in stoprevs:

777

if p in heads and p not in stoprevs:

778

del heads[p]

778

del heads[p]

779

780

return [self.node(r) for r in heads]

780

return [self.node(r) for r in heads]

781

782

def children(self, node):

782

def children(self, node):

783

"""find the children of a given node"""

783

"""find the children of a given node"""

784

c = []

784

c = []

785

p = self.rev(node)

785

p = self.rev(node)

786

for r in range(p + 1, self.count()):

786

for r in range(p + 1, self.count()):

787

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

787

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

788

if prevs:

788

if prevs:

789

for pr in prevs:

789

for pr in prevs:

790

if pr == p:

790

if pr == p:

791

c.append(self.node(r))

791

c.append(self.node(r))

792

elif p == nullrev:

792

elif p == nullrev:

793

c.append(self.node(r))

793

c.append(self.node(r))

794

return c

794

return c

795

796

def _match(self, id):

796

def _match(self, id):

797

if isinstance(id, (long, int)):

797

if isinstance(id, (long, int)):

798

# rev

798

# rev

799

return self.node(id)

799

return self.node(id)

800

if len(id) == 20:

800

if len(id) == 20:

801

# possibly a binary node

801

# possibly a binary node

802

# odds of a binary node being all hex in ASCII are 1 in 10**25

802

# odds of a binary node being all hex in ASCII are 1 in 10**25

803

try:

803

try:

804

node = id

804

node = id

805

r = self.rev(node) # quick search the index

805

r = self.rev(node) # quick search the index

806

return node

806

return node

807

except LookupError:

807

except LookupError:

808

pass # may be partial hex id

808

pass # may be partial hex id

809

try:

809

try:

810

# str(rev)

810

# str(rev)

811

rev = int(id)

811

rev = int(id)

812

if str(rev) != id:

812

if str(rev) != id:

813

raise ValueError

813

raise ValueError

814

if rev < 0:

814

if rev < 0:

815

rev = self.count() + rev

815

rev = self.count() + rev

816

if rev < 0 or rev >= self.count():

816

if rev < 0 or rev >= self.count():

817

raise ValueError

817

raise ValueError

818

return self.node(rev)

818

return self.node(rev)

819

except (ValueError, OverflowError):

819

except (ValueError, OverflowError):

820

pass

820

pass

821

if len(id) == 40:

821

if len(id) == 40:

822

try:

822

try:

823

# a full hex nodeid?

823

# a full hex nodeid?

824

node = bin(id)

824

node = bin(id)

825

r = self.rev(node)

825

r = self.rev(node)

826

return node

826

return node

827

except TypeError:

827

except TypeError:

828

pass

828

pass

829

830

def _partialmatch(self, id):

830

def _partialmatch(self, id):

831

if len(id) < 40:

831

if len(id) < 40:

832

try:

832

try:

833

# hex(node)[:...]

833

# hex(node)[:...]

834

bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits

834

bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits

835

node = None

835

node = None

836

for n in self.nodemap:

836

for n in self.nodemap:

837

if n.startswith(bin_id) and hex(n).startswith(id):

837

if n.startswith(bin_id) and hex(n).startswith(id):

838

if node is not None:

838

if node is not None:

839

raise LookupError(_("Ambiguous identifier"))

839

raise LookupError(_("Ambiguous identifier"))

840

node = n

840

node = n

841

if node is not None:

841

if node is not None:

842

return node

842

return node

843

except TypeError:

843

except TypeError:

844

pass

844

pass

845

846

def lookup(self, id):

846

def lookup(self, id):

847

"""locate a node based on:

847

"""locate a node based on:

848

- revision number or str(revision number)

848

- revision number or str(revision number)

849

- nodeid or subset of hex nodeid

849

- nodeid or subset of hex nodeid

850

"""

850

"""

851

n = self._match(id)

851

n = self._match(id)

852

if n is not None:

852

if n is not None:

853

return n

853

return n

854

n = self._partialmatch(id)

854

n = self._partialmatch(id)

855

if n:

855

if n:

856

return n

856

return n

857

858

raise LookupError(_("No match found"))

858

raise LookupError(_("No match found"))

859

860

def cmp(self, node, text):

860

def cmp(self, node, text):

861

"""compare text with a given file revision"""

861

"""compare text with a given file revision"""

862

p1, p2 = self.parents(node)

862

p1, p2 = self.parents(node)

863

return hash(text, p1, p2) != node

863

return hash(text, p1, p2) != node

864

865

def chunk(self, rev, df=None):

865

def chunk(self, rev, df=None):

866

def loadcache(df):

866

def loadcache(df):

867

if not df:

867

if not df:

868

if self._inline:

868

if self._inline:

869

df = self.opener(self.indexfile)

869

df = self.opener(self.indexfile)

870

else:

870

else:

871

df = self.opener(self.datafile)

871

df = self.opener(self.datafile)

872

df.seek(start)

872

df.seek(start)

873

self._chunkcache = (start, df.read(cache_length))

873

self._chunkcache = (start, df.read(cache_length))

874

875

start, length = self.start(rev), self.length(rev)

875

start, length = self.start(rev), self.length(rev)

876

if self._inline:

876

if self._inline:

877

start += (rev + 1) * self._io.size

877

start += (rev + 1) * self._io.size

878

end = start + length

878

end = start + length

879

880

offset = 0

880

offset = 0

881

if not self._chunkcache:

881

if not self._chunkcache:

882

cache_length = max(65536, length)

882

cache_length = max(65536, length)

883

loadcache(df)

883

loadcache(df)

884

else:

884

else:

885

cache_start = self._chunkcache[0]

885

cache_start = self._chunkcache[0]

886

cache_length = len(self._chunkcache[1])

886

cache_length = len(self._chunkcache[1])

887

cache_end = cache_start + cache_length

887

cache_end = cache_start + cache_length

888

if start >= cache_start and end <= cache_end:

888

if start >= cache_start and end <= cache_end:

889

# it is cached

889

# it is cached

890

offset = start - cache_start

890

offset = start - cache_start

891

else:

891

else:

892

cache_length = max(65536, length)

892

cache_length = max(65536, length)

893

loadcache(df)

893

loadcache(df)

894

895

# avoid copying large chunks

895

# avoid copying large chunks

896

c = self._chunkcache[1]

896

c = self._chunkcache[1]

897

if cache_length != length:

897

if cache_length != length:

898

c = c[offset:offset + length]

898

c = c[offset:offset + length]

899

900

return decompress(c)

900

return decompress(c)

901

902

def delta(self, node):

902

def delta(self, node):

903

"""return or calculate a delta between a node and its predecessor"""

903

"""return or calculate a delta between a node and its predecessor"""

904

r = self.rev(node)

904

r = self.rev(node)

905

return self.revdiff(r - 1, r)

905

return self.revdiff(r - 1, r)

906

907

def revdiff(self, rev1, rev2):

907

def revdiff(self, rev1, rev2):

908

"""return or calculate a delta between two revisions"""

908

"""return or calculate a delta between two revisions"""

909

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

909

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

910

return self.chunk(rev2)

910

return self.chunk(rev2)

911

912

return mdiff.textdiff(self.revision(self.node(rev1)),

912

return mdiff.textdiff(self.revision(self.node(rev1)),

913

self.revision(self.node(rev2)))

913

self.revision(self.node(rev2)))

914

915

def revision(self, node):

915

def revision(self, node):

916

"""return an uncompressed revision of a given"""

916

"""return an uncompressed revision of a given"""

917

if node == nullid:

917

if node == nullid:

918

return ""

918

return ""

919

if self._cache and self._cache[0] == node:

919

if self._cache and self._cache[0] == node:

920

return str(self._cache[2])

920

return str(self._cache[2])

921

922

# look up what we need to read

922

# look up what we need to read

923

text = None

923

text = None

924

rev = self.rev(node)

924

rev = self.rev(node)

925

base = self.base(rev)

925

base = self.base(rev)

926

927

# check rev flags

927

# check rev flags

928

if self.index[rev][0] & 0xFFFF:

928

if self.index[rev][0] & 0xFFFF:

929

raise RevlogError(_('incompatible revision flag %x') %

929

raise RevlogError(_('incompatible revision flag %x') %

930

(self.index[rev][0] & 0xFFFF))

930

(self.index[rev][0] & 0xFFFF))

931

932

if self._inline:

932

if self._inline:

933

# we probably have the whole chunk cached

933

# we probably have the whole chunk cached

934

df = None

934

df = None

935

else:

935

else:

936

df = self.opener(self.datafile)

936

df = self.opener(self.datafile)

937

938

# do we have useful data cached?

938

# do we have useful data cached?

939

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

939

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

940

base = self._cache[1]

940

base = self._cache[1]

941

text = str(self._cache[2])

941

text = str(self._cache[2])

942

self._loadindex(base, rev + 1)

942

self._loadindex(base, rev + 1)

943

else:

943

else:

944

self._loadindex(base, rev + 1)

944

self._loadindex(base, rev + 1)

945

text = self.chunk(base, df=df)

945

text = self.chunk(base, df=df)

946

947

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

947

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

948

text = mdiff.patches(text, bins)

948

text = mdiff.patches(text, bins)

949

p1, p2 = self.parents(node)

949

p1, p2 = self.parents(node)

950

if node != hash(text, p1, p2):

950

if node != hash(text, p1, p2):

951

raise RevlogError(_("integrity check failed on %s:%d")

951

raise RevlogError(_("integrity check failed on %s:%d")

952

% (self.datafile, rev))

952

% (self.datafile, rev))

953

954

self._cache = (node, rev, text)

954

self._cache = (node, rev, text)

955

return text

955

return text

956

957

def checkinlinesize(self, tr, fp=None):

957

def checkinlinesize(self, tr, fp=None):

958

if not self._inline:

958

if not self._inline:

959

return

959

return

960

if not fp:

960

if not fp:

961

fp = self.opener(self.indexfile, 'r')

961

fp = self.opener(self.indexfile, 'r')

962

fp.seek(0, 2)

962

fp.seek(0, 2)

963

size = fp.tell()

963

size = fp.tell()

964

if size < 131072:

964

if size < 131072:

965

return

965

return

966

trinfo = tr.find(self.indexfile)

966

trinfo = tr.find(self.indexfile)

967

if trinfo == None:

967

if trinfo == None:

968

raise RevlogError(_("%s not found in the transaction")

968

raise RevlogError(_("%s not found in the transaction")

969

% self.indexfile)

969

% self.indexfile)

970

971

trindex = trinfo[2]

971

trindex = trinfo[2]

972

dataoff = self.start(trindex)

972

dataoff = self.start(trindex)

973

974

tr.add(self.datafile, dataoff)

974

tr.add(self.datafile, dataoff)

975

df = self.opener(self.datafile, 'w')

975

df = self.opener(self.datafile, 'w')

976

calc = self._io.size

976

calc = self._io.size

977

for r in xrange(self.count()):

977

for r in xrange(self.count()):

978

start = self.start(r) + (r + 1) * calc

978

start = self.start(r) + (r + 1) * calc

979

length = self.length(r)

979

length = self.length(r)

980

fp.seek(start)

980

fp.seek(start)

981

d = fp.read(length)

981

d = fp.read(length)

982

df.write(d)

982

df.write(d)

983

fp.close()

983

fp.close()

984

df.close()

984

df.close()

985

fp = self.opener(self.indexfile, 'w', atomictemp=True)

985

fp = self.opener(self.indexfile, 'w', atomictemp=True)

986

self.version &= ~(REVLOGNGINLINEDATA)

986

self.version &= ~(REVLOGNGINLINEDATA)

987

self._inline = False

987

self._inline = False

988

for i in xrange(self.count()):

988

for i in xrange(self.count()):

989

e = self._io.packentry(self.index[i], self.node, self.version, i)

989

e = self._io.packentry(self.index[i], self.node, self.version, i)

990

fp.write(e)

990

fp.write(e)

991

992

# if we don't call rename, the temp file will never replace the

992

# if we don't call rename, the temp file will never replace the

993

# real index

993

# real index

994

fp.rename()

994

fp.rename()

995

996

tr.replace(self.indexfile, trindex * calc)

996

tr.replace(self.indexfile, trindex * calc)

997

self._chunkcache = None

997

self._chunkcache = None

998

999

def addrevision(self, text, transaction, link, p1, p2, d=None):

999

def addrevision(self, text, transaction, link, p1, p2, d=None):

1000

"""add a revision to the log

1000

"""add a revision to the log

1001

1002

text - the revision data to add

1002

text - the revision data to add

1003

transaction - the transaction object used for rollback

1003

transaction - the transaction object used for rollback

1004

link - the linkrev data to add

1004

link - the linkrev data to add

1005

p1, p2 - the parent nodeids of the revision

1005

p1, p2 - the parent nodeids of the revision

1006

d - an optional precomputed delta

1006

d - an optional precomputed delta

1007

"""

1007

"""

1008

dfh = None

1008

dfh = None

1009

if not self._inline:

1009

if not self._inline:

1010

dfh = self.opener(self.datafile, "a")

1010

dfh = self.opener(self.datafile, "a")

1011

ifh = self.opener(self.indexfile, "a+")

1011

ifh = self.opener(self.indexfile, "a+")

1012

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1012

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1013

1014

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1014

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1015

node = hash(text, p1, p2)

1015

node = hash(text, p1, p2)

1016

if node in self.nodemap:

1016

if node in self.nodemap:

1017

return node

1017

return node

1018

1019

curr = self.count()

1019

curr = self.count()

1020

prev = curr - 1

1020

prev = curr - 1

1021

base = self.base(prev)

1021

base = self.base(prev)

1022

offset = self.end(prev)

1022

offset = self.end(prev)

1023

1024

if curr:

1024

if curr:

1025

if not d:

1025

if not d:

1026

ptext = self.revision(self.node(prev))

1026

ptext = self.revision(self.node(prev))

1027

d = mdiff.textdiff(ptext, text)

1027

d = mdiff.textdiff(ptext, text)

1028

data = compress(d)

1028

data = compress(d)

1029

l = len(data[1]) + len(data[0])

1029

l = len(data[1]) + len(data[0])

1030

dist = l + offset - self.start(base)

1030

dist = l + offset - self.start(base)

1031

1032

# full versions are inserted when the needed deltas

1032

# full versions are inserted when the needed deltas

1033

# become comparable to the uncompressed text

1033

# become comparable to the uncompressed text

1034

if not curr or dist > len(text) * 2:

1034

if not curr or dist > len(text) * 2:

1035

data = compress(text)

1035

data = compress(text)

1036

l = len(data[1]) + len(data[0])

1036

l = len(data[1]) + len(data[0])

1037

base = curr

1037

base = curr

1038

1039

e = (offset_type(offset, 0), l, len(text),

1039

e = (offset_type(offset, 0), l, len(text),

1040

base, link, self.rev(p1), self.rev(p2), node)

1040

base, link, self.rev(p1), self.rev(p2), node)

1041

self.index.insert(-1, e)

1041

self.index.insert(-1, e)

1042

self.nodemap[node] = curr

1042

self.nodemap[node] = curr

1043

1044

entry = self._io.packentry(e, self.node, self.version, curr)

1044

entry = self._io.packentry(e, self.node, self.version, curr)

1045

if not self._inline:

1045

if not self._inline:

1046

transaction.add(self.datafile, offset)

1046

transaction.add(self.datafile, offset)

1047

transaction.add(self.indexfile, curr * len(entry))

1047

transaction.add(self.indexfile, curr * len(entry))

1048

if data[0]:

1048

if data[0]:

1049

dfh.write(data[0])

1049

dfh.write(data[0])

1050

dfh.write(data[1])

1050

dfh.write(data[1])

1051

dfh.flush()

1051

dfh.flush()

1052

ifh.write(entry)

1052

ifh.write(entry)

1053

else:

1053

else:

1054

offset += curr * self._io.size

1054

offset += curr * self._io.size

1055

transaction.add(self.indexfile, offset, curr)

1055

transaction.add(self.indexfile, offset, curr)

1056

ifh.write(entry)

1056

ifh.write(entry)

1057

ifh.write(data[0])

1057

ifh.write(data[0])

1058

ifh.write(data[1])

1058

ifh.write(data[1])

1059

self.checkinlinesize(transaction, ifh)

1059

self.checkinlinesize(transaction, ifh)

1060

1061

self._cache = (node, curr, text)

1061

self._cache = (node, curr, text)

1062

return node

1062

return node

1063

1064

def ancestor(self, a, b):

1064

def ancestor(self, a, b):

1065

"""calculate the least common ancestor of nodes a and b"""

1065

"""calculate the least common ancestor of nodes a and b"""

1066

1067

def parents(rev):

1067

def parents(rev):

1068

return [p for p in self.parentrevs(rev) if p != nullrev]

1068

return [p for p in self.parentrevs(rev) if p != nullrev]

1069

1070

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1070

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1071

if c is None:

1071

if c is None:

1072

return nullid

1072

return nullid

1073

1074

return self.node(c)

1074

return self.node(c)

1075

1076

def group(self, nodelist, lookup, infocollect=None):

1076

def group(self, nodelist, lookup, infocollect=None):

1077

"""calculate a delta group

1077

"""calculate a delta group

1078

1079

Given a list of changeset revs, return a set of deltas and

1079

Given a list of changeset revs, return a set of deltas and

1080

metadata corresponding to nodes. the first delta is

1080

metadata corresponding to nodes. the first delta is

1081

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1081

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1082

have this parent as it has all history before these

1082

have this parent as it has all history before these

1083

changesets. parent is parent[0]

1083

changesets. parent is parent[0]

1084

"""

1084

"""

1085

revs = [self.rev(n) for n in nodelist]

1085

revs = [self.rev(n) for n in nodelist]

1086

1087

# if we don't have any revisions touched by these changesets, bail

1087

# if we don't have any revisions touched by these changesets, bail

1088

if not revs:

1088

if not revs:

1089

yield changegroup.closechunk()

1089

yield changegroup.closechunk()

1090

return

1090

return

1091

1092

# add the parent of the first rev

1092

# add the parent of the first rev

1093

p = self.parents(self.node(revs[0]))[0]

1093

p = self.parents(self.node(revs[0]))[0]

1094

revs.insert(0, self.rev(p))

1094

revs.insert(0, self.rev(p))

1095

1096

# build deltas

1096

# build deltas

1097

for d in xrange(0, len(revs) - 1):

1097

for d in xrange(0, len(revs) - 1):

1098

a, b = revs[d], revs[d + 1]

1098

a, b = revs[d], revs[d + 1]

1099

nb = self.node(b)

1099

nb = self.node(b)

1100

1101

if infocollect is not None:

1101

if infocollect is not None:

1102

infocollect(nb)

1102

infocollect(nb)

1103

1104

p = self.parents(nb)

1104

p = self.parents(nb)

1105

meta = nb + p[0] + p[1] + lookup(nb)

1105

meta = nb + p[0] + p[1] + lookup(nb)

1106

if a == -1:

1106

if a == -1:

1107

d = self.revision(nb)

1107

d = self.revision(nb)

1108

meta += mdiff.trivialdiffheader(len(d))

1108

meta += mdiff.trivialdiffheader(len(d))

1109

else:

1109

else:

1110

d = self.revdiff(a, b)

1110

d = self.revdiff(a, b)

1111

yield changegroup.chunkheader(len(meta) + len(d))

1111

yield changegroup.chunkheader(len(meta) + len(d))

1112

yield meta

1112

yield meta

1113

if len(d) > 2**20:

1113

if len(d) > 2**20:

1114

pos = 0

1114

pos = 0

1115

while pos < len(d):

1115

while pos < len(d):

1116

pos2 = pos + 2 ** 18

1116

pos2 = pos + 2 ** 18

1117

yield d[pos:pos2]

1117

yield d[pos:pos2]

1118

pos = pos2

1118

pos = pos2

1119

else:

1119

else:

1120

yield d

1120

yield d

1121

1122

yield changegroup.closechunk()

1122

yield changegroup.closechunk()

1123

1124

def addgroup(self, revs, linkmapper, transaction, unique=0):

1124

def addgroup(self, revs, linkmapper, transaction, unique=0):

1125

"""

1125

"""

1126

add a delta group

1126

add a delta group

1127

1128

given a set of deltas, add them to the revision log. the

1128

given a set of deltas, add them to the revision log. the

1129

first delta is against its parent, which should be in our

1129

first delta is against its parent, which should be in our

1130

log, the rest are against the previous delta.

1130

log, the rest are against the previous delta.

1131

"""

1131

"""

1132

1133

#track the base of the current delta log

1133

#track the base of the current delta log

1134

r = self.count()

1134

r = self.count()

1135

t = r - 1

1135

t = r - 1

1136

node = None

1136

node = None

1137

1138

base = prev = nullrev

1138

base = prev = nullrev

1139

start = end = textlen = 0

1139

start = end = textlen = 0

1140

if r:

1140

if r:

1141

end = self.end(t)

1141

end = self.end(t)

1142

1143

ifh = self.opener(self.indexfile, "a+")

1143

ifh = self.opener(self.indexfile, "a+")

1144

isize = r * self._io.size

1144

isize = r * self._io.size

1145

if self._inline:

1145

if self._inline:

1146

transaction.add(self.indexfile, end + isize, r)

1146

transaction.add(self.indexfile, end + isize, r)

1147

dfh = None

1147

dfh = None

1148

else:

1148

else:

1149

transaction.add(self.indexfile, isize, r)

1149

transaction.add(self.indexfile, isize, r)

1150

transaction.add(self.datafile, end)

1150

transaction.add(self.datafile, end)

1151

dfh = self.opener(self.datafile, "a")

1151

dfh = self.opener(self.datafile, "a")

1152

1153

# loop through our set of deltas

1153

# loop through our set of deltas

1154

chain = None

1154

chain = None

1155

for chunk in revs:

1155

for chunk in revs:

1156

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1156

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1157

link = linkmapper(cs)

1157

link = linkmapper(cs)

1158

if node in self.nodemap:

1158

if node in self.nodemap:

1159

# this can happen if two branches make the same change

1159

# this can happen if two branches make the same change

1160

# if unique:

1160

# if unique:

1161

# raise RevlogError(_("already have %s") % hex(node[:4]))

1161

# raise RevlogError(_("already have %s") % hex(node[:4]))

1162

chain = node

1162

chain = node

1163

continue

1163

continue

1164

delta = buffer(chunk, 80)

1164

delta = buffer(chunk, 80)

1165

del chunk

1165

del chunk

1166

1167

for p in (p1, p2):

1167

for p in (p1, p2):

1168

if not p in self.nodemap:

1168

if not p in self.nodemap:

1169

raise LookupError(_("unknown parent %s") % short(p))

1169

raise LookupError(_("unknown parent %s") % short(p))

1170

1171

if not chain:

1171

if not chain:

1172

# retrieve the parent revision of the delta chain

1172

# retrieve the parent revision of the delta chain

1173

chain = p1

1173

chain = p1

1174

if not chain in self.nodemap:

1174

if not chain in self.nodemap:

1175

raise LookupError(_("unknown base %s") % short(chain[:4]))

1175

raise LookupError(_("unknown base %s") % short(chain[:4]))

1176

1177

# full versions are inserted when the needed deltas become

1177

# full versions are inserted when the needed deltas become

1178

# comparable to the uncompressed text or when the previous

1178

# comparable to the uncompressed text or when the previous

1179

# version is not the one we have a delta against. We use

1179

# version is not the one we have a delta against. We use

1180

# the size of the previous full rev as a proxy for the

1180

# the size of the previous full rev as a proxy for the

1181

# current size.

1181

# current size.

1182

1183

if chain == prev:

1183

if chain == prev:

1184

cdelta = compress(delta)

1184

cdelta = compress(delta)

1185

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1185

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1186

textlen = mdiff.patchedsize(textlen, delta)

1186

textlen = mdiff.patchedsize(textlen, delta)

1187

1188

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1188

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1189

# flush our writes here so we can read it in revision

1189

# flush our writes here so we can read it in revision

1190

if dfh:

1190

if dfh:

1191

dfh.flush()

1191

dfh.flush()

1192

ifh.flush()

1192

ifh.flush()

1193

text = self.revision(chain)

1193

text = self.revision(chain)

1194

if len(text) == 0:

1194

if len(text) == 0:

1195

# skip over trivial delta header

1195

# skip over trivial delta header

1196

text = buffer(delta, 12)

1196

text = buffer(delta, 12)

1197

else:

1197

else:

1198

text = mdiff.patches(text, [delta])

1198

text = mdiff.patches(text, [delta])

1199

del delta

1199

del delta

1200

chk = self._addrevision(text, transaction, link, p1, p2, None,

1200

chk = self._addrevision(text, transaction, link, p1, p2, None,

1201

ifh, dfh)

1201

ifh, dfh)

1202

if not dfh and not self._inline:

1202

if not dfh and not self._inline:

1203

# addrevision switched from inline to conventional

1203

# addrevision switched from inline to conventional

1204

# reopen the index

1204

# reopen the index

1205

dfh = self.opener(self.datafile, "a")

1205

dfh = self.opener(self.datafile, "a")

1206

ifh = self.opener(self.indexfile, "a")

1206

ifh = self.opener(self.indexfile, "a")

1207

if chk != node:

1207

if chk != node:

1208

raise RevlogError(_("consistency error adding group"))

1208

raise RevlogError(_("consistency error adding group"))

1209

textlen = len(text)

1209

textlen = len(text)

1210

else:

1210

else:

1211

e = (offset_type(end, 0), cdeltalen, textlen, base,

1211

e = (offset_type(end, 0), cdeltalen, textlen, base,

1212

link, self.rev(p1), self.rev(p2), node)

1212

link, self.rev(p1), self.rev(p2), node)

1213

self.index.insert(-1, e)

1213

self.index.insert(-1, e)

1214

self.nodemap[node] = r

1214

self.nodemap[node] = r

1215

entry = self._io.packentry(e, self.node, self.version, r)

1215

entry = self._io.packentry(e, self.node, self.version, r)

1216

if self._inline:

1216

if self._inline:

1217

ifh.write(entry)

1217

ifh.write(entry)

1218

ifh.write(cdelta[0])

1218

ifh.write(cdelta[0])

1219

ifh.write(cdelta[1])

1219

ifh.write(cdelta[1])

1220

self.checkinlinesize(transaction, ifh)

1220

self.checkinlinesize(transaction, ifh)

1221

if not self._inline:

1221

if not self._inline:

1222

dfh = self.opener(self.datafile, "a")

1222

dfh = self.opener(self.datafile, "a")

1223

ifh = self.opener(self.indexfile, "a")

1223

ifh = self.opener(self.indexfile, "a")

1224

else:

1224

else:

1225

dfh.write(cdelta[0])

1225

dfh.write(cdelta[0])

1226

dfh.write(cdelta[1])

1226

dfh.write(cdelta[1])

1227

ifh.write(entry)

1227

ifh.write(entry)

1228

1229

t, r, chain, prev = r, r + 1, node, node

1229

t, r, chain, prev = r, r + 1, node, node

1230

base = self.base(t)

1230

base = self.base(t)

1231

start = self.start(base)

1231

start = self.start(base)

1232

end = self.end(t)

1232

end = self.end(t)

1233

1234

return node

1234

return node

1235

1236

def strip(self, rev, minlink):

1236

def strip(self, rev, minlink):

1237

if self.count() == 0 or rev >= self.count():

1237

if self.count() == 0 or rev >= self.count():

1238

return

1238

return

1239

1240

if isinstance(self.index, lazyindex):

1240

if isinstance(self.index, lazyindex):

1241

self._loadindexmap()

1241

self._loadindexmap()

1242

1243

# When stripping away a revision, we need to make sure it

1243

# When stripping away a revision, we need to make sure it

1244

# does not actually belong to an older changeset.

1244

# does not actually belong to an older changeset.

1245

# The minlink parameter defines the oldest revision

1245

# The minlink parameter defines the oldest revision

1246

# we're allowed to strip away.

1246

# we're allowed to strip away.

1247

while minlink > self.index[rev][4]:

1247

while minlink > self.index[rev][4]:

1248

rev += 1

1248

rev += 1

1249

if rev >= self.count():

1249

if rev >= self.count():

1250

return

1250

return

1251

1252

# first truncate the files on disk

1252

# first truncate the files on disk

1253

end = self.start(rev)

1253

end = self.start(rev)

1254

if not self._inline:

1254

if not self._inline:

1255

df = self.opener(self.datafile, "a")

1255

df = self.opener(self.datafile, "a")

1256

df.truncate(end)

1256

df.truncate(end)

1257

end = rev * self._io.size

1257

end = rev * self._io.size

1258

else:

1258

else:

1259

end += rev * self._io.size

1259

end += rev * self._io.size

1260

1261

indexf = self.opener(self.indexfile, "a")

1261

indexf = self.opener(self.indexfile, "a")

1262

indexf.truncate(end)

1262

indexf.truncate(end)

1263

1264

# then reset internal state in memory to forget those revisions

1264

# then reset internal state in memory to forget those revisions

1265

self._cache = None

1265

self._cache = None

1266

self._chunkcache = None

1266

self._chunkcache = None

1267

for x in xrange(rev, self.count()):

1267

for x in xrange(rev, self.count()):

1268

del self.nodemap[self.node(x)]

1268

del self.nodemap[self.node(x)]

1269

1270

del self.index[rev:-1]

1270

del self.index[rev:-1]

1271

1272

def checksize(self):

1272

def checksize(self):

1273

expected = 0

1273

expected = 0

1274

if self.count():

1274

if self.count():

1275

expected = max(0, self.end(self.count() - 1))

1275

expected = max(0, self.end(self.count() - 1))

1276

1277

try:

1277

try:

1278

f = self.opener(self.datafile)

1278

f = self.opener(self.datafile)

1279

f.seek(0, 2)

1279

f.seek(0, 2)

1280

actual = f.tell()

1280

actual = f.tell()

1281

dd = actual - expected

1281

dd = actual - expected

1282

except IOError, inst:

1282

except IOError, inst:

1283

if inst.errno != errno.ENOENT:

1283

if inst.errno != errno.ENOENT:

1284

raise

1284

raise

1285

dd = 0

1285

dd = 0

1286

1287

try:

1287

try:

1288

f = self.opener(self.indexfile)

1288

f = self.opener(self.indexfile)

1289

f.seek(0, 2)

1289

f.seek(0, 2)

1290

actual = f.tell()

1290

actual = f.tell()

1291

s = self._io.size

1291

s = self._io.size

1292

i = max(0, actual / s)

1292

i = max(0, actual / s)

1293

di = actual - (i * s)

1293

di = actual - (i * s)

1294

if self._inline:

1294

if self._inline:

1295

databytes = 0

1295

databytes = 0

1296

for r in xrange(self.count()):

1296

for r in xrange(self.count()):

1297

databytes += max(0, self.length(r))

1297

databytes += max(0, self.length(r))

1298

dd = 0

1298

dd = 0

1299

di = actual - self.count() * s - databytes

1299

di = actual - self.count() * s - databytes

1300

except IOError, inst:

1300

except IOError, inst:

1301

if inst.errno != errno.ENOENT:

1301

if inst.errno != errno.ENOENT:

1302

raise

1302

raise

1303

di = 0

1303

di = 0

1304

1305

return (dd, di)

1305

return (dd, di)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import _
             import binascii, changegroup, errno, ancestor, mdiff, os
             import sha, struct, util, zlib
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = sha.new
             # revlog flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             class RevlogError(Exception):
                 pass
             class LookupError(RevlogError):
                 pass
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = _sha(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
                 bin = None
                 if l < 44:
                     pass
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
                 if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     return _decompress(bin)
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
                                                   hasattr(util, 'win32api'))
                 def __init__(self, dataf, size):
                     self.dataf = dataf
                     self.s = struct.calcsize(indexformatng)
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: nullrev}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap:
                         return
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + ngshaoffset:off + ngshaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all:
                         return
                     if data is None:
                         self.dataf.seek(blockstart)
                         if blockstart + blocksize > self.datasize:
                             # the revlog may have grown since we've started running,
                             # but we don't have space in self.index for more entries.
                             # limit blocksize so that we don't get too much data.
                             blocksize = max(self.datasize - blockstart, 0)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     # lazyindex supports __delitem__
                     if lend > len(self.index) - i:
                         lend = len(self.index) - i
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[ngshaoffset:ngshaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap:
                         return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so we have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.find(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + ngshaoffset:off + ngshaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all:
                         return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (65536 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~1023) * self.s
                             blocksize = self.s * 1024
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all:
                         self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = _pack(indexformatng, *item)
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def insert(self, pos, e):
                     self.p.index.insert(pos, _pack(indexformatng, *e))
                 def append(self, e):
                     self.p.index.append(_pack(indexformatng, *e))
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         ret = self.p.index[i]
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = _unpack(indexformatng, ret)
                         yield ret[7]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, fp, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     data = fp.read()
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
-                              nodemap[e[4]], nodemap[e[5]], e[6])
+                              nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, fp, inline):
                     try:
                         size = util.fstat(fp).st_size
                     except AttributeError:
                         size = 0
                     if lazyparser.safe_to_use and not inline and size > 1000000:
                         # big index, let's parse it on demand
                         parser = lazyparser(fp, size)
                         index = lazyindex(parser)
                         nodemap = lazymap(parser)
                         e = list(index[0])
                         type = gettype(e[0])
                         e[0] = offset_type(0, type)
                         index[0] = e
                         return index, nodemap, None
                     s = self.size
                     cache = None
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     # if we're not using lazymap, always read the whole index
                     data = fp.read()
                     l = len(data) - s
                     append = index.append
                     if inline:
                         cache = (0, data)
                         while off <= l:
                             e = _unpack(indexformatng, data[off:off + s])
                             nodemap[e[7]] = n
                             append(e)
                             n += 1
                             if e[1] < 0:
                                 break
                             off += e[1] + s
                     else:
                         while off <= l:
                             e = _unpack(indexformatng, data[off:off + s])
                             nodemap[e[7]] = n
                             append(e)
                             n += 1
                             off += s
                     e = list(index[0])
                     type = gettype(e[0])
                     e[0] = offset_type(0, type)
                     index[0] = e
                     return index, nodemap, cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._chunkcache = None
                     self.nodemap = {nullid: nullrev}
                     self.index = []
                     v = REVLOG_DEFAULT_VERSION
                     if hasattr(opener, "defversion"):
                         v = opener.defversion
                         if v & REVLOGNG:
                             v |= REVLOGNGINLINEDATA
                     i = ""
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i)[0]
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     if i:
                         d = self._io.parseindex(f, self._inline)
                         self.index, self.nodemap, self._chunkcache = d
                     # add the magic null revision at -1
                     self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                 def _loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def _loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def _loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def count(self):
                     return len(self.index) - 1
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, node):
                     return self.index[self.rev(node)][4]
                 def parents(self, node):
                     d = self.index[self.rev(node)][5:7]
                     return (self.node(d[0]), self.node(d[1]))
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def base(self, rev):
                     return self.index[rev][3]
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def reachable(self, node, stop=None):
                     """return a hash of all nodes ancestral to a given node, including
                      the node itself, stopping when stop is matched"""
                     reachable = {}
                     visit = [node]
                     reachable[node] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads.keys()
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == nullrev:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         count = self.count()
                         if not count:
                             return [nullid]
                         ishead = [1] * (count + 1)
                         index = self.index
                         for r in xrange(count):
                             e = index[r]
                             ishead[e[5]] = ishead[e[6]] = 0
                         return [self.node(r) for r in xrange(count) if ishead[r]]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = dict.fromkeys([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev: 1}
                     heads = {startrev: 1}
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, self.count()):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable[r] = 1
                                 heads[r] = 1
                             if p in heads and p not in stoprevs:
                                 del heads[p]
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def _match(self, id):
                     if isinstance(id, (long, int)):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             r = self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = self.count() + rev
                         if rev < 0 or rev >= self.count():
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             r = self.rev(node)
                             return node
                         except TypeError:
                             pass
                 def _partialmatch(self, id):
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
                             node = None
                             for n in self.nodemap:
                                 if n.startswith(bin_id) and hex(n).startswith(id):
                                     if node is not None:
                                         raise LookupError(_("Ambiguous identifier"))
                                     node = n
                             if node is not None:
                                 return node
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(_("No match found"))
                 def cmp(self, node, text):
                     """compare text with a given file revision"""
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def chunk(self, rev, df=None):
                     def loadcache(df):
                         if not df:
                             if self._inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self._chunkcache = (start, df.read(cache_length))
                     start, length = self.start(rev), self.length(rev)
                     if self._inline:
                         start += (rev + 1) * self._io.size
                     end = start + length
                     offset = 0
                     if not self._chunkcache:
                         cache_length = max(65536, length)
                         loadcache(df)
                     else:
                         cache_start = self._chunkcache[0]
                         cache_length = len(self._chunkcache[1])
                         cache_end = cache_start + cache_length
                         if start >= cache_start and end <= cache_end:
                             # it is cached
                             offset = start - cache_start
                         else:
                             cache_length = max(65536, length)
                             loadcache(df)
                     # avoid copying large chunks
                     c = self._chunkcache[1]
                     if cache_length != length:
                         c = c[offset:offset + length]
                     return decompress(c)
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
                         return self.chunk(rev2)
                     return mdiff.textdiff(self.revision(self.node(rev1)),
                                           self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid:
                         return ""
                     if self._cache and self._cache[0] == node:
                         return str(self._cache[2])
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # check rev flags
                     if self.index[rev][0] & 0xFFFF:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.index[rev][0] & 0xFFFF))
                     if self._inline:
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self._cache and self._cache[1] >= base and self._cache[1] < rev:
                         base = self._cache[1]
                         text = str(self._cache[2])
                         self._loadindex(base, rev + 1)
                     else:
                         self._loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
                     text = mdiff.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.datafile, rev))
                     self._cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline:
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                         fp.seek(0, 2)
                     size = fp.tell()
                     if size < 131072:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo == None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     df = self.opener(self.datafile, 'w')
                     calc = self._io.size
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in xrange(self.count()):
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * calc)
                     self._chunkcache = None
                 def addrevision(self, text, transaction, link, p1, p2, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
                 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     curr = self.count()
                     prev = curr - 1
                     base = self.base(prev)
                     offset = self.end(prev)
                     if curr:
                         if not d:
                             ptext = self.revision(self.node(prev))
                             d = mdiff.textdiff(ptext, text)
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = l + offset - self.start(base)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not curr or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = curr
                     e = (offset_type(offset, 0), l, len(text),
                          base, link, self.rev(p1), self.rev(p2), node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     self._cache = (node, curr, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         if a == -1:
                             d = self.revision(nb)
                             meta += mdiff.trivialdiffheader(len(d))
                         else:
                             d = self.revdiff(a, b)
                         yield changegroup.chunkheader(len(meta) + len(d))
                         yield meta
                         if len(d) > 2**20:
                             pos = 0
                             while pos < len(d):
                                 pos2 = pos + 2 ** 18
                                 yield d[pos:pos2]
                                 pos = pos2
                         else:
                             yield d
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = nullrev
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = buffer(chunk, 80)
                         del chunk
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise LookupError(_("unknown parent %s") % short(p))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise LookupError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             cdelta = compress(delta)
                             cdeltalen = len(cdelta[0]) + len(cdelta[1])
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + cdeltalen) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             if len(text) == 0:
                                 # skip over trivial delta header
                                 text = buffer(delta, 12)
                             else:
                                 text = mdiff.patches(text, [delta])
                             del delta
                             chk = self._addrevision(text, transaction, link, p1, p2, None,
                                                     ifh, dfh)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 dfh = self.opener(self.datafile, "a")
                                 ifh = self.opener(self.indexfile, "a")
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             e = (offset_type(end, 0), cdeltalen, textlen, base,
                                  link, self.rev(p1), self.rev(p2), node)
                             self.index.insert(-1, e)
                             self.nodemap[node] = r
                             entry = self._io.packentry(e, self.node, self.version, r)
                             if self._inline:
                                 ifh.write(entry)
                                 ifh.write(cdelta[0])
                                 ifh.write(cdelta[1])
                                 self.checkinlinesize(transaction, ifh)
                                 if not self._inline:
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 dfh.write(cdelta[0])
                                 dfh.write(cdelta[1])
                                 ifh.write(entry)
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self._loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = max(0, self.end(self.count() - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = self._io.size
                         i = max(0, actual / s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)