upstream/mercurial-mirror Commit - r5453:9d77f2b4

1

"""

1

"""

2

revlog.py - storage back-end for mercurial

2

revlog.py - storage back-end for mercurial

3

4

This provides efficient delta storage with O(1) retrieve and append

4

This provides efficient delta storage with O(1) retrieve and append

5

and O(changes) merge between branches

5

and O(changes) merge between branches

6

7

8

9

This software may be used and distributed according to the terms

9

This software may be used and distributed according to the terms

10

of the GNU General Public License, incorporated herein by reference.

10

of the GNU General Public License, incorporated herein by reference.

11

"""

11

"""

12

13

from node import *

13

from node import *

14

from i18n import _

14

from i18n import _

15

import binascii, changegroup, errno, ancestor, mdiff, os

15

import binascii, changegroup, errno, ancestor, mdiff, os

16

import sha, struct, util, zlib

16

import sha, struct, util, zlib

17

18

_pack = struct.pack

18

_pack = struct.pack

19

_unpack = struct.unpack

19

_unpack = struct.unpack

20

_compress = zlib.compress

20

_compress = zlib.compress

21

_decompress = zlib.decompress

21

_decompress = zlib.decompress

22

_sha = sha.new

22

_sha = sha.new

23

24

# revlog flags

24

# revlog flags

25

REVLOGV0 = 0

25

REVLOGV0 = 0

26

REVLOGNG = 1

26

REVLOGNG = 1

27

REVLOGNGINLINEDATA = (1 << 16)

27

REVLOGNGINLINEDATA = (1 << 16)

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

28

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

29

REVLOG_DEFAULT_FORMAT = REVLOGNG

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

30

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

31

32

class RevlogError(Exception):

32

class RevlogError(Exception):

33

pass

33

pass

34

class LookupError(RevlogError):

34

class LookupError(RevlogError):

35

pass

35

pass

36

37

def getoffset(q):

37

def getoffset(q):

38

return int(q >> 16)

38

return int(q >> 16)

39

40

def gettype(q):

40

def gettype(q):

41

return int(q & 0xFFFF)

41

return int(q & 0xFFFF)

42

43

def offset_type(offset, type):

43

def offset_type(offset, type):

44

return long(long(offset) << 16 | type)

44

return long(long(offset) << 16 | type)

45

46

def hash(text, p1, p2):

46

def hash(text, p1, p2):

47

"""generate a hash from the given text and its parent hashes

47

"""generate a hash from the given text and its parent hashes

48

49

This hash combines both the current file contents and its history

49

This hash combines both the current file contents and its history

50

in a manner that makes it easy to distinguish nodes with the same

50

in a manner that makes it easy to distinguish nodes with the same

51

content in the revision graph.

51

content in the revision graph.

52

"""

52

"""

53

l = [p1, p2]

53

l = [p1, p2]

54

l.sort()

54

l.sort()

55

s = _sha(l[0])

55

s = _sha(l[0])

56

s.update(l[1])

56

s.update(l[1])

57

s.update(text)

57

s.update(text)

58

return s.digest()

58

return s.digest()

59

60

def compress(text):

60

def compress(text):

61

""" generate a possibly-compressed representation of text """

61

""" generate a possibly-compressed representation of text """

62

if not text:

62

if not text:

63

return ("", text)

63

return ("", text)

64

l = len(text)

64

l = len(text)

65

bin = None

65

if l < 44:

66

if l < 44:

66

if text[0] == '\0':

67

pass

67

return ("", text)

68

return ('u', text)

69

elif l > 1000000:

68

elif l > 1000000:

70

# zlib makes an internal copy, thus doubling memory usage for

69

# zlib makes an internal copy, thus doubling memory usage for

71

# large files, so lets do this in pieces

70

# large files, so lets do this in pieces

72

z = zlib.compressobj()

71

z = zlib.compressobj()

73

p = []

72

p = []

74

pos = 0

73

pos = 0

75

while pos < l:

74

while pos < l:

76

pos2 = pos + 2**20

75

pos2 = pos + 2**20

77

p.append(z.compress(text[pos:pos2]))

76

p.append(z.compress(text[pos:pos2]))

78

pos = pos2

77

pos = pos2

79

p.append(z.flush())

78

p.append(z.flush())

80

if sum(map(len, p)) < l:

79

if sum(map(len, p)) < l:

81

bin = "".join(p)

80

bin = "".join(p)

82

else:

81

else:

83

bin = _compress(text)

82

bin = _compress(text)

84

if len(bin) > l:

83

if bin is None or len(bin) > l:

85

if text[0] == '\0':

84

if text[0] == '\0':

86

return ("", text)

85

return ("", text)

87

return ('u', text)

86

return ('u', text)

88

return ("", bin)

87

return ("", bin)

89

88

90

def decompress(bin):

89

def decompress(bin):

91

""" decompress the given input """

90

""" decompress the given input """

92

if not bin:

91

if not bin:

93

return bin

92

return bin

94

t = bin[0]

93

t = bin[0]

95

if t == '\0':

94

if t == '\0':

96

return bin

95

return bin

97

if t == 'x':

96

if t == 'x':

98

return _decompress(bin)

97

return _decompress(bin)

99

if t == 'u':

98

if t == 'u':

100

return bin[1:]

99

return bin[1:]

101

raise RevlogError(_("unknown compression type %r") % t)

100

raise RevlogError(_("unknown compression type %r") % t)

102

101

103

class lazyparser(object):

102

class lazyparser(object):

104

"""

103

"""

105

this class avoids the need to parse the entirety of large indices

104

this class avoids the need to parse the entirety of large indices

106

"""

105

"""

107

106

108

# lazyparser is not safe to use on windows if win32 extensions not

107

# lazyparser is not safe to use on windows if win32 extensions not

109

# available. it keeps file handle open, which make it not possible

108

# available. it keeps file handle open, which make it not possible

110

# to break hardlinks on local cloned repos.

109

# to break hardlinks on local cloned repos.

111

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

110

safe_to_use = os.name != 'nt' or (not util.is_win_9x() and

112

hasattr(util, 'win32api'))

111

hasattr(util, 'win32api'))

113

112

114

def __init__(self, dataf, size):

113

def __init__(self, dataf, size):

115

self.dataf = dataf

114

self.dataf = dataf

116

self.s = struct.calcsize(indexformatng)

115

self.s = struct.calcsize(indexformatng)

117

self.datasize = size

116

self.datasize = size

118

self.l = size/self.s

117

self.l = size/self.s

119

self.index = [None] * self.l

118

self.index = [None] * self.l

120

self.map = {nullid: nullrev}

119

self.map = {nullid: nullrev}

121

self.allmap = 0

120

self.allmap = 0

122

self.all = 0

121

self.all = 0

123

self.mapfind_count = 0

122

self.mapfind_count = 0

124

123

125

def loadmap(self):

124

def loadmap(self):

126

"""

125

"""

127

during a commit, we need to make sure the rev being added is

126

during a commit, we need to make sure the rev being added is

128

not a duplicate. This requires loading the entire index,

127

not a duplicate. This requires loading the entire index,

129

which is fairly slow. loadmap can load up just the node map,

128

which is fairly slow. loadmap can load up just the node map,

130

which takes much less time.

129

which takes much less time.

131

"""

130

"""

132

if self.allmap:

131

if self.allmap:

133

return

132

return

134

end = self.datasize

133

end = self.datasize

135

self.allmap = 1

134

self.allmap = 1

136

cur = 0

135

cur = 0

137

count = 0

136

count = 0

138

blocksize = self.s * 256

137

blocksize = self.s * 256

139

self.dataf.seek(0)

138

self.dataf.seek(0)

140

while cur < end:

139

while cur < end:

141

data = self.dataf.read(blocksize)

140

data = self.dataf.read(blocksize)

142

off = 0

141

off = 0

143

for x in xrange(256):

142

for x in xrange(256):

144

n = data[off + ngshaoffset:off + ngshaoffset + 20]

143

n = data[off + ngshaoffset:off + ngshaoffset + 20]

145

self.map[n] = count

144

self.map[n] = count

146

count += 1

145

count += 1

147

if count >= self.l:

146

if count >= self.l:

148

break

147

break

149

off += self.s

148

off += self.s

150

cur += blocksize

149

cur += blocksize

151

150

152

def loadblock(self, blockstart, blocksize, data=None):

151

def loadblock(self, blockstart, blocksize, data=None):

153

if self.all:

152

if self.all:

154

return

153

return

155

if data is None:

154

if data is None:

156

self.dataf.seek(blockstart)

155

self.dataf.seek(blockstart)

157

if blockstart + blocksize > self.datasize:

156

if blockstart + blocksize > self.datasize:

158

# the revlog may have grown since we've started running,

157

# the revlog may have grown since we've started running,

159

# but we don't have space in self.index for more entries.

158

# but we don't have space in self.index for more entries.

160

# limit blocksize so that we don't get too much data.

159

# limit blocksize so that we don't get too much data.

161

blocksize = max(self.datasize - blockstart, 0)

160

blocksize = max(self.datasize - blockstart, 0)

162

data = self.dataf.read(blocksize)

161

data = self.dataf.read(blocksize)

163

lend = len(data) / self.s

162

lend = len(data) / self.s

164

i = blockstart / self.s

163

i = blockstart / self.s

165

off = 0

164

off = 0

166

# lazyindex supports __delitem__

165

# lazyindex supports __delitem__

167

if lend > len(self.index) - i:

166

if lend > len(self.index) - i:

168

lend = len(self.index) - i

167

lend = len(self.index) - i

169

for x in xrange(lend):

168

for x in xrange(lend):

170

if self.index[i + x] == None:

169

if self.index[i + x] == None:

171

b = data[off : off + self.s]

170

b = data[off : off + self.s]

172

self.index[i + x] = b

171

self.index[i + x] = b

173

n = b[ngshaoffset:ngshaoffset + 20]

172

n = b[ngshaoffset:ngshaoffset + 20]

174

self.map[n] = i + x

173

self.map[n] = i + x

175

off += self.s

174

off += self.s

176

175

177

def findnode(self, node):

176

def findnode(self, node):

178

"""search backwards through the index file for a specific node"""

177

"""search backwards through the index file for a specific node"""

179

if self.allmap:

178

if self.allmap:

180

return None

179

return None

181

180

182

# hg log will cause many many searches for the manifest

181

# hg log will cause many many searches for the manifest

183

# nodes. After we get called a few times, just load the whole

182

# nodes. After we get called a few times, just load the whole

184

# thing.

183

# thing.

185

if self.mapfind_count > 8:

184

if self.mapfind_count > 8:

186

self.loadmap()

185

self.loadmap()

187

if node in self.map:

186

if node in self.map:

188

return node

187

return node

189

return None

188

return None

190

self.mapfind_count += 1

189

self.mapfind_count += 1

191

last = self.l - 1

190

last = self.l - 1

192

while self.index[last] != None:

191

while self.index[last] != None:

193

if last == 0:

192

if last == 0:

194

self.all = 1

193

self.all = 1

195

self.allmap = 1

194

self.allmap = 1

196

return None

195

return None

197

last -= 1

196

last -= 1

198

end = (last + 1) * self.s

197

end = (last + 1) * self.s

199

blocksize = self.s * 256

198

blocksize = self.s * 256

200

while end >= 0:

199

while end >= 0:

201

start = max(end - blocksize, 0)

200

start = max(end - blocksize, 0)

202

self.dataf.seek(start)

201

self.dataf.seek(start)

203

data = self.dataf.read(end - start)

202

data = self.dataf.read(end - start)

204

findend = end - start

203

findend = end - start

205

while True:

204

while True:

206

# we're searching backwards, so we have to make sure

205

# we're searching backwards, so we have to make sure

207

# we don't find a changeset where this node is a parent

206

# we don't find a changeset where this node is a parent

208

off = data.find(node, 0, findend)

207

off = data.find(node, 0, findend)

209

findend = off

208

findend = off

210

if off >= 0:

209

if off >= 0:

211

i = off / self.s

210

i = off / self.s

212

off = i * self.s

211

off = i * self.s

213

n = data[off + ngshaoffset:off + ngshaoffset + 20]

212

n = data[off + ngshaoffset:off + ngshaoffset + 20]

214

if n == node:

213

if n == node:

215

self.map[n] = i + start / self.s

214

self.map[n] = i + start / self.s

216

return node

215

return node

217

else:

216

else:

218

break

217

break

219

end -= blocksize

218

end -= blocksize

220

return None

219

return None

221

220

222

def loadindex(self, i=None, end=None):

221

def loadindex(self, i=None, end=None):

223

if self.all:

222

if self.all:

224

return

223

return

225

all = False

224

all = False

226

if i == None:

225

if i == None:

227

blockstart = 0

226

blockstart = 0

228

blocksize = (65536 / self.s) * self.s

227

blocksize = (65536 / self.s) * self.s

229

end = self.datasize

228

end = self.datasize

230

all = True

229

all = True

231

else:

230

else:

232

if end:

231

if end:

233

blockstart = i * self.s

232

blockstart = i * self.s

234

end = end * self.s

233

end = end * self.s

235

blocksize = end - blockstart

234

blocksize = end - blockstart

236

else:

235

else:

237

blockstart = (i & ~1023) * self.s

236

blockstart = (i & ~1023) * self.s

238

blocksize = self.s * 1024

237

blocksize = self.s * 1024

239

end = blockstart + blocksize

238

end = blockstart + blocksize

240

while blockstart < end:

239

while blockstart < end:

241

self.loadblock(blockstart, blocksize)

240

self.loadblock(blockstart, blocksize)

242

blockstart += blocksize

241

blockstart += blocksize

243

if all:

242

if all:

244

self.all = True

243

self.all = True

245

244

246

class lazyindex(object):

245

class lazyindex(object):

247

"""a lazy version of the index array"""

246

"""a lazy version of the index array"""

248

def __init__(self, parser):

247

def __init__(self, parser):

249

self.p = parser

248

self.p = parser

250

def __len__(self):

249

def __len__(self):

251

return len(self.p.index)

250

return len(self.p.index)

252

def load(self, pos):

251

def load(self, pos):

253

if pos < 0:

252

if pos < 0:

254

pos += len(self.p.index)

253

pos += len(self.p.index)

255

self.p.loadindex(pos)

254

self.p.loadindex(pos)

256

return self.p.index[pos]

255

return self.p.index[pos]

257

def __getitem__(self, pos):

256

def __getitem__(self, pos):

258

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

257

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

259

def __setitem__(self, pos, item):

258

def __setitem__(self, pos, item):

260

self.p.index[pos] = _pack(indexformatng, *item)

259

self.p.index[pos] = _pack(indexformatng, *item)

261

def __delitem__(self, pos):

260

def __delitem__(self, pos):

262

del self.p.index[pos]

261

del self.p.index[pos]

263

def insert(self, pos, e):

262

def insert(self, pos, e):

264

self.p.index.insert(pos, _pack(indexformatng, *e))

263

self.p.index.insert(pos, _pack(indexformatng, *e))

265

def append(self, e):

264

def append(self, e):

266

self.p.index.append(_pack(indexformatng, *e))

265

self.p.index.append(_pack(indexformatng, *e))

267

266

268

class lazymap(object):

267

class lazymap(object):

269

"""a lazy version of the node map"""

268

"""a lazy version of the node map"""

270

def __init__(self, parser):

269

def __init__(self, parser):

271

self.p = parser

270

self.p = parser

272

def load(self, key):

271

def load(self, key):

273

n = self.p.findnode(key)

272

n = self.p.findnode(key)

274

if n == None:

273

if n == None:

275

raise KeyError(key)

274

raise KeyError(key)

276

def __contains__(self, key):

275

def __contains__(self, key):

277

if key in self.p.map:

276

if key in self.p.map:

278

return True

277

return True

279

self.p.loadmap()

278

self.p.loadmap()

280

return key in self.p.map

279

return key in self.p.map

281

def __iter__(self):

280

def __iter__(self):

282

yield nullid

281

yield nullid

283

for i in xrange(self.p.l):

282

for i in xrange(self.p.l):

284

ret = self.p.index[i]

283

ret = self.p.index[i]

285

if not ret:

284

if not ret:

286

self.p.loadindex(i)

285

self.p.loadindex(i)

287

ret = self.p.index[i]

286

ret = self.p.index[i]

288

if isinstance(ret, str):

287

if isinstance(ret, str):

289

ret = _unpack(indexformatng, ret)

288

ret = _unpack(indexformatng, ret)

290

yield ret[7]

289

yield ret[7]

291

def __getitem__(self, key):

290

def __getitem__(self, key):

292

try:

291

try:

293

return self.p.map[key]

292

return self.p.map[key]

294

except KeyError:

293

except KeyError:

295

try:

294

try:

296

self.load(key)

295

self.load(key)

297

return self.p.map[key]

296

return self.p.map[key]

298

except KeyError:

297

except KeyError:

299

raise KeyError("node " + hex(key))

298

raise KeyError("node " + hex(key))

300

def __setitem__(self, key, val):

299

def __setitem__(self, key, val):

301

self.p.map[key] = val

300

self.p.map[key] = val

302

def __delitem__(self, key):

301

def __delitem__(self, key):

303

del self.p.map[key]

302

del self.p.map[key]

304

303

305

indexformatv0 = ">4l20s20s20s"

304

indexformatv0 = ">4l20s20s20s"

306

v0shaoffset = 56

305

v0shaoffset = 56

307

306

308

class revlogoldio(object):

307

class revlogoldio(object):

309

def __init__(self):

308

def __init__(self):

310

self.size = struct.calcsize(indexformatv0)

309

self.size = struct.calcsize(indexformatv0)

311

310

312

def parseindex(self, fp, inline):

311

def parseindex(self, fp, inline):

313

s = self.size

312

s = self.size

314

index = []

313

index = []

315

nodemap = {nullid: nullrev}

314

nodemap = {nullid: nullrev}

316

n = off = 0

315

n = off = 0

317

data = fp.read()

316

data = fp.read()

318

l = len(data)

317

l = len(data)

319

while off + s <= l:

318

while off + s <= l:

320

cur = data[off:off + s]

319

cur = data[off:off + s]

321

off += s

320

off += s

322

e = _unpack(indexformatv0, cur)

321

e = _unpack(indexformatv0, cur)

323

# transform to revlogv1 format

322

# transform to revlogv1 format

324

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

323

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

325

nodemap[e[4]], nodemap[e[5]], e[6])

324

nodemap[e[4]], nodemap[e[5]], e[6])

326

index.append(e2)

325

index.append(e2)

327

nodemap[e[6]] = n

326

nodemap[e[6]] = n

328

n += 1

327

n += 1

329

328

330

return index, nodemap, None

329

return index, nodemap, None

331

330

332

def packentry(self, entry, node, version, rev):

331

def packentry(self, entry, node, version, rev):

333

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

332

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

334

node(entry[5]), node(entry[6]), entry[7])

333

node(entry[5]), node(entry[6]), entry[7])

335

return _pack(indexformatv0, *e2)

334

return _pack(indexformatv0, *e2)

336

335

337

# index ng:

336

# index ng:

338

# 6 bytes offset

337

# 6 bytes offset

339

# 2 bytes flags

338

# 2 bytes flags

340

# 4 bytes compressed length

339

# 4 bytes compressed length

341

# 4 bytes uncompressed length

340

# 4 bytes uncompressed length

342

# 4 bytes: base rev

341

# 4 bytes: base rev

343

# 4 bytes link rev

342

# 4 bytes link rev

344

# 4 bytes parent 1 rev

343

# 4 bytes parent 1 rev

345

# 4 bytes parent 2 rev

344

# 4 bytes parent 2 rev

346

# 32 bytes: nodeid

345

# 32 bytes: nodeid

347

indexformatng = ">Qiiiiii20s12x"

346

indexformatng = ">Qiiiiii20s12x"

348

ngshaoffset = 32

347

ngshaoffset = 32

349

versionformat = ">I"

348

versionformat = ">I"

350

349

351

class revlogio(object):

350

class revlogio(object):

352

def __init__(self):

351

def __init__(self):

353

self.size = struct.calcsize(indexformatng)

352

self.size = struct.calcsize(indexformatng)

354

353

355

def parseindex(self, fp, inline):

354

def parseindex(self, fp, inline):

356

try:

355

try:

357

size = util.fstat(fp).st_size

356

size = util.fstat(fp).st_size

358

except AttributeError:

357

except AttributeError:

359

size = 0

358

size = 0

360

359

361

if lazyparser.safe_to_use and not inline and size > 1000000:

360

if lazyparser.safe_to_use and not inline and size > 1000000:

362

# big index, let's parse it on demand

361

# big index, let's parse it on demand

363

parser = lazyparser(fp, size)

362

parser = lazyparser(fp, size)

364

index = lazyindex(parser)

363

index = lazyindex(parser)

365

nodemap = lazymap(parser)

364

nodemap = lazymap(parser)

366

e = list(index[0])

365

e = list(index[0])

367

type = gettype(e[0])

366

type = gettype(e[0])

368

e[0] = offset_type(0, type)

367

e[0] = offset_type(0, type)

369

index[0] = e

368

index[0] = e

370

return index, nodemap, None

369

return index, nodemap, None

371

370

372

s = self.size

371

s = self.size

373

cache = None

372

cache = None

374

index = []

373

index = []

375

nodemap = {nullid: nullrev}

374

nodemap = {nullid: nullrev}

376

n = off = 0

375

n = off = 0

377

# if we're not using lazymap, always read the whole index

376

# if we're not using lazymap, always read the whole index

378

data = fp.read()

377

data = fp.read()

379

l = len(data) - s

378

l = len(data) - s

380

append = index.append

379

append = index.append

381

if inline:

380

if inline:

382

cache = (0, data)

381

cache = (0, data)

383

while off <= l:

382

while off <= l:

384

e = _unpack(indexformatng, data[off:off + s])

383

e = _unpack(indexformatng, data[off:off + s])

385

nodemap[e[7]] = n

384

nodemap[e[7]] = n

386

append(e)

385

append(e)

387

n += 1

386

n += 1

388

if e[1] < 0:

387

if e[1] < 0:

389

break

388

break

390

off += e[1] + s

389

off += e[1] + s

391

else:

390

else:

392

while off <= l:

391

while off <= l:

393

e = _unpack(indexformatng, data[off:off + s])

392

e = _unpack(indexformatng, data[off:off + s])

394

nodemap[e[7]] = n

393

nodemap[e[7]] = n

395

append(e)

394

append(e)

396

n += 1

395

n += 1

397

off += s

396

off += s

398

397

399

e = list(index[0])

398

e = list(index[0])

400

type = gettype(e[0])

399

type = gettype(e[0])

401

e[0] = offset_type(0, type)

400

e[0] = offset_type(0, type)

402

index[0] = e

401

index[0] = e

403

402

404

return index, nodemap, cache

403

return index, nodemap, cache

405

404

406

def packentry(self, entry, node, version, rev):

405

def packentry(self, entry, node, version, rev):

407

p = _pack(indexformatng, *entry)

406

p = _pack(indexformatng, *entry)

408

if rev == 0:

407

if rev == 0:

409

p = _pack(versionformat, version) + p[4:]

408

p = _pack(versionformat, version) + p[4:]

410

return p

409

return p

411

410

412

class revlog(object):

411

class revlog(object):

413

"""

412

"""

414

the underlying revision storage object

413

the underlying revision storage object

415

414

416

A revlog consists of two parts, an index and the revision data.

415

A revlog consists of two parts, an index and the revision data.

417

416

418

The index is a file with a fixed record size containing

417

The index is a file with a fixed record size containing

419

information on each revision, includings its nodeid (hash), the

418

information on each revision, includings its nodeid (hash), the

420

nodeids of its parents, the position and offset of its data within

419

nodeids of its parents, the position and offset of its data within

421

the data file, and the revision it's based on. Finally, each entry

420

the data file, and the revision it's based on. Finally, each entry

422

contains a linkrev entry that can serve as a pointer to external

421

contains a linkrev entry that can serve as a pointer to external

423

data.

422

data.

424

423

425

The revision data itself is a linear collection of data chunks.

424

The revision data itself is a linear collection of data chunks.

426

Each chunk represents a revision and is usually represented as a

425

Each chunk represents a revision and is usually represented as a

427

delta against the previous chunk. To bound lookup time, runs of

426

delta against the previous chunk. To bound lookup time, runs of

428

deltas are limited to about 2 times the length of the original

427

deltas are limited to about 2 times the length of the original

429

version data. This makes retrieval of a version proportional to

428

version data. This makes retrieval of a version proportional to

430

its size, or O(1) relative to the number of revisions.

429

its size, or O(1) relative to the number of revisions.

431

430

432

Both pieces of the revlog are written to in an append-only

431

Both pieces of the revlog are written to in an append-only

433

fashion, which means we never need to rewrite a file to insert or

432

fashion, which means we never need to rewrite a file to insert or

434

remove data, and can use some simple techniques to avoid the need

433

remove data, and can use some simple techniques to avoid the need

435

for locking while reading.

434

for locking while reading.

436

"""

435

"""

437

def __init__(self, opener, indexfile):

436

def __init__(self, opener, indexfile):

438

"""

437

"""

439

create a revlog object

438

create a revlog object

440

439

441

opener is a function that abstracts the file opening operation

440

opener is a function that abstracts the file opening operation

442

and can be used to implement COW semantics or the like.

441

and can be used to implement COW semantics or the like.

443

"""

442

"""

444

self.indexfile = indexfile

443

self.indexfile = indexfile

445

self.datafile = indexfile[:-2] + ".d"

444

self.datafile = indexfile[:-2] + ".d"

446

self.opener = opener

445

self.opener = opener

447

self._cache = None

446

self._cache = None

448

self._chunkcache = None

447

self._chunkcache = None

449

self.nodemap = {nullid: nullrev}

448

self.nodemap = {nullid: nullrev}

450

self.index = []

449

self.index = []

451

450

452

v = REVLOG_DEFAULT_VERSION

451

v = REVLOG_DEFAULT_VERSION

453

if hasattr(opener, "defversion"):

452

if hasattr(opener, "defversion"):

454

v = opener.defversion

453

v = opener.defversion

455

if v & REVLOGNG:

454

if v & REVLOGNG:

456

v |= REVLOGNGINLINEDATA

455

v |= REVLOGNGINLINEDATA

457

456

458

i = ""

457

i = ""

459

try:

458

try:

460

f = self.opener(self.indexfile)

459

f = self.opener(self.indexfile)

461

i = f.read(4)

460

i = f.read(4)

462

f.seek(0)

461

f.seek(0)

463

if len(i) > 0:

462

if len(i) > 0:

464

v = struct.unpack(versionformat, i)[0]

463

v = struct.unpack(versionformat, i)[0]

465

except IOError, inst:

464

except IOError, inst:

466

if inst.errno != errno.ENOENT:

465

if inst.errno != errno.ENOENT:

467

raise

466

raise

468

467

469

self.version = v

468

self.version = v

470

self._inline = v & REVLOGNGINLINEDATA

469

self._inline = v & REVLOGNGINLINEDATA

471

flags = v & ~0xFFFF

470

flags = v & ~0xFFFF

472

fmt = v & 0xFFFF

471

fmt = v & 0xFFFF

473

if fmt == REVLOGV0 and flags:

472

if fmt == REVLOGV0 and flags:

474

raise RevlogError(_("index %s unknown flags %#04x for format v0")

473

raise RevlogError(_("index %s unknown flags %#04x for format v0")

475

% (self.indexfile, flags >> 16))

474

% (self.indexfile, flags >> 16))

476

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

475

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

477

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

476

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

478

% (self.indexfile, flags >> 16))

477

% (self.indexfile, flags >> 16))

479

elif fmt > REVLOGNG:

478

elif fmt > REVLOGNG:

480

raise RevlogError(_("index %s unknown format %d")

479

raise RevlogError(_("index %s unknown format %d")

481

% (self.indexfile, fmt))

480

% (self.indexfile, fmt))

482

481

483

self._io = revlogio()

482

self._io = revlogio()

484

if self.version == REVLOGV0:

483

if self.version == REVLOGV0:

485

self._io = revlogoldio()

484

self._io = revlogoldio()

486

if i:

485

if i:

487

d = self._io.parseindex(f, self._inline)

486

d = self._io.parseindex(f, self._inline)

488

self.index, self.nodemap, self._chunkcache = d

487

self.index, self.nodemap, self._chunkcache = d

489

488

490

# add the magic null revision at -1

489

# add the magic null revision at -1

491

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

490

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

492

491

493

def _loadindex(self, start, end):

492

def _loadindex(self, start, end):

494

"""load a block of indexes all at once from the lazy parser"""

493

"""load a block of indexes all at once from the lazy parser"""

495

if isinstance(self.index, lazyindex):

494

if isinstance(self.index, lazyindex):

496

self.index.p.loadindex(start, end)

495

self.index.p.loadindex(start, end)

497

496

498

def _loadindexmap(self):

497

def _loadindexmap(self):

499

"""loads both the map and the index from the lazy parser"""

498

"""loads both the map and the index from the lazy parser"""

500

if isinstance(self.index, lazyindex):

499

if isinstance(self.index, lazyindex):

501

p = self.index.p

500

p = self.index.p

502

p.loadindex()

501

p.loadindex()

503

self.nodemap = p.map

502

self.nodemap = p.map

504

503

505

def _loadmap(self):

504

def _loadmap(self):

506

"""loads the map from the lazy parser"""

505

"""loads the map from the lazy parser"""

507

if isinstance(self.nodemap, lazymap):

506

if isinstance(self.nodemap, lazymap):

508

self.nodemap.p.loadmap()

507

self.nodemap.p.loadmap()

509

self.nodemap = self.nodemap.p.map

508

self.nodemap = self.nodemap.p.map

510

509

511

def tip(self):

510

def tip(self):

512

return self.node(len(self.index) - 2)

511

return self.node(len(self.index) - 2)

513

def count(self):

512

def count(self):

514

return len(self.index) - 1

513

return len(self.index) - 1

515

514

516

def rev(self, node):

515

def rev(self, node):

517

try:

516

try:

518

return self.nodemap[node]

517

return self.nodemap[node]

519

except KeyError:

518

except KeyError:

520

raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))

519

raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))

521

def node(self, rev):

520

def node(self, rev):

522

return self.index[rev][7]

521

return self.index[rev][7]

523

def linkrev(self, node):

522

def linkrev(self, node):

524

return self.index[self.rev(node)][4]

523

return self.index[self.rev(node)][4]

525

def parents(self, node):

524

def parents(self, node):

526

d = self.index[self.rev(node)][5:7]

525

d = self.index[self.rev(node)][5:7]

527

return (self.node(d[0]), self.node(d[1]))

526

return (self.node(d[0]), self.node(d[1]))

528

def parentrevs(self, rev):

527

def parentrevs(self, rev):

529

return self.index[rev][5:7]

528

return self.index[rev][5:7]

530

def start(self, rev):

529

def start(self, rev):

531

return int(self.index[rev][0] >> 16)

530

return int(self.index[rev][0] >> 16)

532

def end(self, rev):

531

def end(self, rev):

533

return self.start(rev) + self.length(rev)

532

return self.start(rev) + self.length(rev)

534

def length(self, rev):

533

def length(self, rev):

535

return self.index[rev][1]

534

return self.index[rev][1]

536

def base(self, rev):

535

def base(self, rev):

537

return self.index[rev][3]

536

return self.index[rev][3]

538

537

539

def size(self, rev):

538

def size(self, rev):

540

"""return the length of the uncompressed text for a given revision"""

539

"""return the length of the uncompressed text for a given revision"""

541

l = self.index[rev][2]

540

l = self.index[rev][2]

542

if l >= 0:

541

if l >= 0:

543

return l

542

return l

544

543

545

t = self.revision(self.node(rev))

544

t = self.revision(self.node(rev))

546

return len(t)

545

return len(t)

547

546

548

# alternate implementation, The advantage to this code is it

547

# alternate implementation, The advantage to this code is it

549

# will be faster for a single revision. But, the results are not

548

# will be faster for a single revision. But, the results are not

550

# cached, so finding the size of every revision will be slower.

549

# cached, so finding the size of every revision will be slower.

551

"""

550

"""

552

if self.cache and self.cache[1] == rev:

551

if self.cache and self.cache[1] == rev:

553

return len(self.cache[2])

552

return len(self.cache[2])

554

553

555

base = self.base(rev)

554

base = self.base(rev)

556

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

555

if self.cache and self.cache[1] >= base and self.cache[1] < rev:

557

base = self.cache[1]

556

base = self.cache[1]

558

text = self.cache[2]

557

text = self.cache[2]

559

else:

558

else:

560

text = self.revision(self.node(base))

559

text = self.revision(self.node(base))

561

560

562

l = len(text)

561

l = len(text)

563

for x in xrange(base + 1, rev + 1):

562

for x in xrange(base + 1, rev + 1):

564

l = mdiff.patchedsize(l, self.chunk(x))

563

l = mdiff.patchedsize(l, self.chunk(x))

565

return l

564

return l

566

"""

565

"""

567

566

568

def reachable(self, node, stop=None):

567

def reachable(self, node, stop=None):

569

"""return a hash of all nodes ancestral to a given node, including

568

"""return a hash of all nodes ancestral to a given node, including

570

the node itself, stopping when stop is matched"""

569

the node itself, stopping when stop is matched"""

571

reachable = {}

570

reachable = {}

572

visit = [node]

571

visit = [node]

573

reachable[node] = 1

572

reachable[node] = 1

574

if stop:

573

if stop:

575

stopn = self.rev(stop)

574

stopn = self.rev(stop)

576

else:

575

else:

577

stopn = 0

576

stopn = 0

578

while visit:

577

while visit:

579

n = visit.pop(0)

578

n = visit.pop(0)

580

if n == stop:

579

if n == stop:

581

continue

580

continue

582

if n == nullid:

581

if n == nullid:

583

continue

582

continue

584

for p in self.parents(n):

583

for p in self.parents(n):

585

if self.rev(p) < stopn:

584

if self.rev(p) < stopn:

586

continue

585

continue

587

if p not in reachable:

586

if p not in reachable:

588

reachable[p] = 1

587

reachable[p] = 1

589

visit.append(p)

588

visit.append(p)

590

return reachable

589

return reachable

591

590

592

def nodesbetween(self, roots=None, heads=None):

591

def nodesbetween(self, roots=None, heads=None):

593

"""Return a tuple containing three elements. Elements 1 and 2 contain

592

"""Return a tuple containing three elements. Elements 1 and 2 contain

594

a final list bases and heads after all the unreachable ones have been

593

a final list bases and heads after all the unreachable ones have been

595

pruned. Element 0 contains a topologically sorted list of all

594

pruned. Element 0 contains a topologically sorted list of all

596

595

597

nodes that satisfy these constraints:

596

nodes that satisfy these constraints:

598

1. All nodes must be descended from a node in roots (the nodes on

597

1. All nodes must be descended from a node in roots (the nodes on

599

roots are considered descended from themselves).

598

roots are considered descended from themselves).

600

2. All nodes must also be ancestors of a node in heads (the nodes in

599

2. All nodes must also be ancestors of a node in heads (the nodes in

601

heads are considered to be their own ancestors).

600

heads are considered to be their own ancestors).

602

601

603

If roots is unspecified, nullid is assumed as the only root.

602

If roots is unspecified, nullid is assumed as the only root.

604

If heads is unspecified, it is taken to be the output of the

603

If heads is unspecified, it is taken to be the output of the

605

heads method (i.e. a list of all nodes in the repository that

604

heads method (i.e. a list of all nodes in the repository that

606

have no children)."""

605

have no children)."""

607

nonodes = ([], [], [])

606

nonodes = ([], [], [])

608

if roots is not None:

607

if roots is not None:

609

roots = list(roots)

608

roots = list(roots)

610

if not roots:

609

if not roots:

611

return nonodes

610

return nonodes

612

lowestrev = min([self.rev(n) for n in roots])

611

lowestrev = min([self.rev(n) for n in roots])

613

else:

612

else:

614

roots = [nullid] # Everybody's a descendent of nullid

613

roots = [nullid] # Everybody's a descendent of nullid

615

lowestrev = nullrev

614

lowestrev = nullrev

616

if (lowestrev == nullrev) and (heads is None):

615

if (lowestrev == nullrev) and (heads is None):

617

# We want _all_ the nodes!

616

# We want _all_ the nodes!

618

return ([self.node(r) for r in xrange(0, self.count())],

617

return ([self.node(r) for r in xrange(0, self.count())],

619

[nullid], list(self.heads()))

618

[nullid], list(self.heads()))

620

if heads is None:

619

if heads is None:

621

# All nodes are ancestors, so the latest ancestor is the last

620

# All nodes are ancestors, so the latest ancestor is the last

622

# node.

621

# node.

623

highestrev = self.count() - 1

622

highestrev = self.count() - 1

624

# Set ancestors to None to signal that every node is an ancestor.

623

# Set ancestors to None to signal that every node is an ancestor.

625

ancestors = None

624

ancestors = None

626

# Set heads to an empty dictionary for later discovery of heads

625

# Set heads to an empty dictionary for later discovery of heads

627

heads = {}

626

heads = {}

628

else:

627

else:

629

heads = list(heads)

628

heads = list(heads)

630

if not heads:

629

if not heads:

631

return nonodes

630

return nonodes

632

ancestors = {}

631

ancestors = {}

633

# Turn heads into a dictionary so we can remove 'fake' heads.

632

# Turn heads into a dictionary so we can remove 'fake' heads.

634

# Also, later we will be using it to filter out the heads we can't

633

# Also, later we will be using it to filter out the heads we can't

635

# find from roots.

634

# find from roots.

636

heads = dict.fromkeys(heads, 0)

635

heads = dict.fromkeys(heads, 0)

637

# Start at the top and keep marking parents until we're done.

636

# Start at the top and keep marking parents until we're done.

638

nodestotag = heads.keys()

637

nodestotag = heads.keys()

639

# Remember where the top was so we can use it as a limit later.

638

# Remember where the top was so we can use it as a limit later.

640

highestrev = max([self.rev(n) for n in nodestotag])

639

highestrev = max([self.rev(n) for n in nodestotag])

641

while nodestotag:

640

while nodestotag:

642

# grab a node to tag

641

# grab a node to tag

643

n = nodestotag.pop()

642

n = nodestotag.pop()

644

# Never tag nullid

643

# Never tag nullid

645

if n == nullid:

644

if n == nullid:

646

continue

645

continue

647

# A node's revision number represents its place in a

646

# A node's revision number represents its place in a

648

# topologically sorted list of nodes.

647

# topologically sorted list of nodes.

649

r = self.rev(n)

648

r = self.rev(n)

650

if r >= lowestrev:

649

if r >= lowestrev:

651

if n not in ancestors:

650

if n not in ancestors:

652

# If we are possibly a descendent of one of the roots

651

# If we are possibly a descendent of one of the roots

653

# and we haven't already been marked as an ancestor

652

# and we haven't already been marked as an ancestor

654

ancestors[n] = 1 # Mark as ancestor

653

ancestors[n] = 1 # Mark as ancestor

655

# Add non-nullid parents to list of nodes to tag.

654

# Add non-nullid parents to list of nodes to tag.

656

nodestotag.extend([p for p in self.parents(n) if

655

nodestotag.extend([p for p in self.parents(n) if

657

p != nullid])

656

p != nullid])

658

elif n in heads: # We've seen it before, is it a fake head?

657

elif n in heads: # We've seen it before, is it a fake head?

659

# So it is, real heads should not be the ancestors of

658

# So it is, real heads should not be the ancestors of

660

# any other heads.

659

# any other heads.

661

heads.pop(n)

660

heads.pop(n)

662

if not ancestors:

661

if not ancestors:

663

return nonodes

662

return nonodes

664

# Now that we have our set of ancestors, we want to remove any

663

# Now that we have our set of ancestors, we want to remove any

665

# roots that are not ancestors.

664

# roots that are not ancestors.

666

665

667

# If one of the roots was nullid, everything is included anyway.

666

# If one of the roots was nullid, everything is included anyway.

668

if lowestrev > nullrev:

667

if lowestrev > nullrev:

669

# But, since we weren't, let's recompute the lowest rev to not

668

# But, since we weren't, let's recompute the lowest rev to not

670

# include roots that aren't ancestors.

669

# include roots that aren't ancestors.

671

670

672

# Filter out roots that aren't ancestors of heads

671

# Filter out roots that aren't ancestors of heads

673

roots = [n for n in roots if n in ancestors]

672

roots = [n for n in roots if n in ancestors]

674

# Recompute the lowest revision

673

# Recompute the lowest revision

675

if roots:

674

if roots:

676

lowestrev = min([self.rev(n) for n in roots])

675

lowestrev = min([self.rev(n) for n in roots])

677

else:

676

else:

678

# No more roots? Return empty list

677

# No more roots? Return empty list

679

return nonodes

678

return nonodes

680

else:

679

else:

681

# We are descending from nullid, and don't need to care about

680

# We are descending from nullid, and don't need to care about

682

# any other roots.

681

# any other roots.

683

lowestrev = nullrev

682

lowestrev = nullrev

684

roots = [nullid]

683

roots = [nullid]

685

# Transform our roots list into a 'set' (i.e. a dictionary where the

684

# Transform our roots list into a 'set' (i.e. a dictionary where the

686

# values don't matter.

685

# values don't matter.

687

descendents = dict.fromkeys(roots, 1)

686

descendents = dict.fromkeys(roots, 1)

688

# Also, keep the original roots so we can filter out roots that aren't

687

# Also, keep the original roots so we can filter out roots that aren't

689

# 'real' roots (i.e. are descended from other roots).

688

# 'real' roots (i.e. are descended from other roots).

690

roots = descendents.copy()

689

roots = descendents.copy()

691

# Our topologically sorted list of output nodes.

690

# Our topologically sorted list of output nodes.

692

orderedout = []

691

orderedout = []

693

# Don't start at nullid since we don't want nullid in our output list,

692

# Don't start at nullid since we don't want nullid in our output list,

694

# and if nullid shows up in descedents, empty parents will look like

693

# and if nullid shows up in descedents, empty parents will look like

695

# they're descendents.

694

# they're descendents.

696

for r in xrange(max(lowestrev, 0), highestrev + 1):

695

for r in xrange(max(lowestrev, 0), highestrev + 1):

697

n = self.node(r)

696

n = self.node(r)

698

isdescendent = False

697

isdescendent = False

699

if lowestrev == nullrev: # Everybody is a descendent of nullid

698

if lowestrev == nullrev: # Everybody is a descendent of nullid

700

isdescendent = True

699

isdescendent = True

701

elif n in descendents:

700

elif n in descendents:

702

# n is already a descendent

701

# n is already a descendent

703

isdescendent = True

702

isdescendent = True

704

# This check only needs to be done here because all the roots

703

# This check only needs to be done here because all the roots

705

# will start being marked is descendents before the loop.

704

# will start being marked is descendents before the loop.

706

if n in roots:

705

if n in roots:

707

# If n was a root, check if it's a 'real' root.

706

# If n was a root, check if it's a 'real' root.

708

p = tuple(self.parents(n))

707

p = tuple(self.parents(n))

709

# If any of its parents are descendents, it's not a root.

708

# If any of its parents are descendents, it's not a root.

710

if (p[0] in descendents) or (p[1] in descendents):

709

if (p[0] in descendents) or (p[1] in descendents):

711

roots.pop(n)

710

roots.pop(n)

712

else:

711

else:

713

p = tuple(self.parents(n))

712

p = tuple(self.parents(n))

714

# A node is a descendent if either of its parents are

713

# A node is a descendent if either of its parents are

715

# descendents. (We seeded the dependents list with the roots

714

# descendents. (We seeded the dependents list with the roots

716

# up there, remember?)

715

# up there, remember?)

717

if (p[0] in descendents) or (p[1] in descendents):

716

if (p[0] in descendents) or (p[1] in descendents):

718

descendents[n] = 1

717

descendents[n] = 1

719

isdescendent = True

718

isdescendent = True

720

if isdescendent and ((ancestors is None) or (n in ancestors)):

719

if isdescendent and ((ancestors is None) or (n in ancestors)):

721

# Only include nodes that are both descendents and ancestors.

720

# Only include nodes that are both descendents and ancestors.

722

orderedout.append(n)

721

orderedout.append(n)

723

if (ancestors is not None) and (n in heads):

722

if (ancestors is not None) and (n in heads):

724

# We're trying to figure out which heads are reachable

723

# We're trying to figure out which heads are reachable

725

# from roots.

724

# from roots.

726

# Mark this head as having been reached

725

# Mark this head as having been reached

727

heads[n] = 1

726

heads[n] = 1

728

elif ancestors is None:

727

elif ancestors is None:

729

# Otherwise, we're trying to discover the heads.

728

# Otherwise, we're trying to discover the heads.

730

# Assume this is a head because if it isn't, the next step

729

# Assume this is a head because if it isn't, the next step

731

# will eventually remove it.

730

# will eventually remove it.

732

heads[n] = 1

731

heads[n] = 1

733

# But, obviously its parents aren't.

732

# But, obviously its parents aren't.

734

for p in self.parents(n):

733

for p in self.parents(n):

735

heads.pop(p, None)

734

heads.pop(p, None)

736

heads = [n for n in heads.iterkeys() if heads[n] != 0]

735

heads = [n for n in heads.iterkeys() if heads[n] != 0]

737

roots = roots.keys()

736

roots = roots.keys()

738

assert orderedout

737

assert orderedout

739

assert roots

738

assert roots

740

assert heads

739

assert heads

741

return (orderedout, roots, heads)

740

return (orderedout, roots, heads)

742

741

743

def heads(self, start=None, stop=None):

742

def heads(self, start=None, stop=None):

744

"""return the list of all nodes that have no children

743

"""return the list of all nodes that have no children

745

744

746

if start is specified, only heads that are descendants of

745

if start is specified, only heads that are descendants of

747

start will be returned

746

start will be returned

748

if stop is specified, it will consider all the revs from stop

747

if stop is specified, it will consider all the revs from stop

749

as if they had no children

748

as if they had no children

750

"""

749

"""

751

if start is None and stop is None:

750

if start is None and stop is None:

752

count = self.count()

751

count = self.count()

753

if not count:

752

if not count:

754

return [nullid]

753

return [nullid]

755

ishead = [1] * (count + 1)

754

ishead = [1] * (count + 1)

756

index = self.index

755

index = self.index

757

for r in xrange(count):

756

for r in xrange(count):

758

e = index[r]

757

e = index[r]

759

ishead[e[5]] = ishead[e[6]] = 0

758

ishead[e[5]] = ishead[e[6]] = 0

760

return [self.node(r) for r in xrange(count) if ishead[r]]

759

return [self.node(r) for r in xrange(count) if ishead[r]]

761

760

762

if start is None:

761

if start is None:

763

start = nullid

762

start = nullid

764

if stop is None:

763

if stop is None:

765

stop = []

764

stop = []

766

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

765

stoprevs = dict.fromkeys([self.rev(n) for n in stop])

767

startrev = self.rev(start)

766

startrev = self.rev(start)

768

reachable = {startrev: 1}

767

reachable = {startrev: 1}

769

heads = {startrev: 1}

768

heads = {startrev: 1}

770

769

771

parentrevs = self.parentrevs

770

parentrevs = self.parentrevs

772

for r in xrange(startrev + 1, self.count()):

771

for r in xrange(startrev + 1, self.count()):

773

for p in parentrevs(r):

772

for p in parentrevs(r):

774

if p in reachable:

773

if p in reachable:

775

if r not in stoprevs:

774

if r not in stoprevs:

776

reachable[r] = 1

775

reachable[r] = 1

777

heads[r] = 1

776

heads[r] = 1

778

if p in heads and p not in stoprevs:

777

if p in heads and p not in stoprevs:

779

del heads[p]

778

del heads[p]

780

779

781

return [self.node(r) for r in heads]

780

return [self.node(r) for r in heads]

782

781

783

def children(self, node):

782

def children(self, node):

784

"""find the children of a given node"""

783

"""find the children of a given node"""

785

c = []

784

c = []

786

p = self.rev(node)

785

p = self.rev(node)

787

for r in range(p + 1, self.count()):

786

for r in range(p + 1, self.count()):

788

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

787

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

789

if prevs:

788

if prevs:

790

for pr in prevs:

789

for pr in prevs:

791

if pr == p:

790

if pr == p:

792

c.append(self.node(r))

791

c.append(self.node(r))

793

elif p == nullrev:

792

elif p == nullrev:

794

c.append(self.node(r))

793

c.append(self.node(r))

795

return c

794

return c

796

795

797

def _match(self, id):

796

def _match(self, id):

798

if isinstance(id, (long, int)):

797

if isinstance(id, (long, int)):

799

# rev

798

# rev

800

return self.node(id)

799

return self.node(id)

801

if len(id) == 20:

800

if len(id) == 20:

802

# possibly a binary node

801

# possibly a binary node

803

# odds of a binary node being all hex in ASCII are 1 in 10**25

802

# odds of a binary node being all hex in ASCII are 1 in 10**25

804

try:

803

try:

805

node = id

804

node = id

806

r = self.rev(node) # quick search the index

805

r = self.rev(node) # quick search the index

807

return node

806

return node

808

except LookupError:

807

except LookupError:

809

pass # may be partial hex id

808

pass # may be partial hex id

810

try:

809

try:

811

# str(rev)

810

# str(rev)

812

rev = int(id)

811

rev = int(id)

813

if str(rev) != id:

812

if str(rev) != id:

814

raise ValueError

813

raise ValueError

815

if rev < 0:

814

if rev < 0:

816

rev = self.count() + rev

815

rev = self.count() + rev

817

if rev < 0 or rev >= self.count():

816

if rev < 0 or rev >= self.count():

818

raise ValueError

817

raise ValueError

819

return self.node(rev)

818

return self.node(rev)

820

except (ValueError, OverflowError):

819

except (ValueError, OverflowError):

821

pass

820

pass

822

if len(id) == 40:

821

if len(id) == 40:

823

try:

822

try:

824

# a full hex nodeid?

823

# a full hex nodeid?

825

node = bin(id)

824

node = bin(id)

826

r = self.rev(node)

825

r = self.rev(node)

827

return node

826

return node

828

except TypeError:

827

except TypeError:

829

pass

828

pass

830

829

831

def _partialmatch(self, id):

830

def _partialmatch(self, id):

832

if len(id) < 40:

831

if len(id) < 40:

833

try:

832

try:

834

# hex(node)[:...]

833

# hex(node)[:...]

835

bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits

834

bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits

836

node = None

835

node = None

837

for n in self.nodemap:

836

for n in self.nodemap:

838

if n.startswith(bin_id) and hex(n).startswith(id):

837

if n.startswith(bin_id) and hex(n).startswith(id):

839

if node is not None:

838

if node is not None:

840

raise LookupError(_("Ambiguous identifier"))

839

raise LookupError(_("Ambiguous identifier"))

841

node = n

840

node = n

842

if node is not None:

841

if node is not None:

843

return node

842

return node

844

except TypeError:

843

except TypeError:

845

pass

844

pass

846

845

847

def lookup(self, id):

846

def lookup(self, id):

848

"""locate a node based on:

847

"""locate a node based on:

849

- revision number or str(revision number)

848

- revision number or str(revision number)

850

- nodeid or subset of hex nodeid

849

- nodeid or subset of hex nodeid

851

"""

850

"""

852

n = self._match(id)

851

n = self._match(id)

853

if n is not None:

852

if n is not None:

854

return n

853

return n

855

n = self._partialmatch(id)

854

n = self._partialmatch(id)

856

if n:

855

if n:

857

return n

856

return n

858

857

859

raise LookupError(_("No match found"))

858

raise LookupError(_("No match found"))

860

859

861

def cmp(self, node, text):

860

def cmp(self, node, text):

862

"""compare text with a given file revision"""

861

"""compare text with a given file revision"""

863

p1, p2 = self.parents(node)

862

p1, p2 = self.parents(node)

864

return hash(text, p1, p2) != node

863

return hash(text, p1, p2) != node

865

864

866

def chunk(self, rev, df=None):

865

def chunk(self, rev, df=None):

867

def loadcache(df):

866

def loadcache(df):

868

if not df:

867

if not df:

869

if self._inline:

868

if self._inline:

870

df = self.opener(self.indexfile)

869

df = self.opener(self.indexfile)

871

else:

870

else:

872

df = self.opener(self.datafile)

871

df = self.opener(self.datafile)

873

df.seek(start)

872

df.seek(start)

874

self._chunkcache = (start, df.read(cache_length))

873

self._chunkcache = (start, df.read(cache_length))

875

874

876

start, length = self.start(rev), self.length(rev)

875

start, length = self.start(rev), self.length(rev)

877

if self._inline:

876

if self._inline:

878

start += (rev + 1) * self._io.size

877

start += (rev + 1) * self._io.size

879

end = start + length

878

end = start + length

880

879

881

offset = 0

880

offset = 0

882

if not self._chunkcache:

881

if not self._chunkcache:

883

cache_length = max(65536, length)

882

cache_length = max(65536, length)

884

loadcache(df)

883

loadcache(df)

885

else:

884

else:

886

cache_start = self._chunkcache[0]

885

cache_start = self._chunkcache[0]

887

cache_length = len(self._chunkcache[1])

886

cache_length = len(self._chunkcache[1])

888

cache_end = cache_start + cache_length

887

cache_end = cache_start + cache_length

889

if start >= cache_start and end <= cache_end:

888

if start >= cache_start and end <= cache_end:

890

# it is cached

889

# it is cached

891

offset = start - cache_start

890

offset = start - cache_start

892

else:

891

else:

893

cache_length = max(65536, length)

892

cache_length = max(65536, length)

894

loadcache(df)

893

loadcache(df)

895

894

896

# avoid copying large chunks

895

# avoid copying large chunks

897

c = self._chunkcache[1]

896

c = self._chunkcache[1]

898

if cache_length != length:

897

if cache_length != length:

899

c = c[offset:offset + length]

898

c = c[offset:offset + length]

900

899

901

return decompress(c)

900

return decompress(c)

902

901

903

def delta(self, node):

902

def delta(self, node):

904

"""return or calculate a delta between a node and its predecessor"""

903

"""return or calculate a delta between a node and its predecessor"""

905

r = self.rev(node)

904

r = self.rev(node)

906

return self.revdiff(r - 1, r)

905

return self.revdiff(r - 1, r)

907

906

908

def revdiff(self, rev1, rev2):

907

def revdiff(self, rev1, rev2):

909

"""return or calculate a delta between two revisions"""

908

"""return or calculate a delta between two revisions"""

910

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

909

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

911

return self.chunk(rev2)

910

return self.chunk(rev2)

912

911

913

return mdiff.textdiff(self.revision(self.node(rev1)),

912

return mdiff.textdiff(self.revision(self.node(rev1)),

914

self.revision(self.node(rev2)))

913

self.revision(self.node(rev2)))

915

914

916

def revision(self, node):

915

def revision(self, node):

917

"""return an uncompressed revision of a given"""

916

"""return an uncompressed revision of a given"""

918

if node == nullid:

917

if node == nullid:

919

return ""

918

return ""

920

if self._cache and self._cache[0] == node:

919

if self._cache and self._cache[0] == node:

921

return str(self._cache[2])

920

return str(self._cache[2])

922

921

923

# look up what we need to read

922

# look up what we need to read

924

text = None

923

text = None

925

rev = self.rev(node)

924

rev = self.rev(node)

926

base = self.base(rev)

925

base = self.base(rev)

927

926

928

# check rev flags

927

# check rev flags

929

if self.index[rev][0] & 0xFFFF:

928

if self.index[rev][0] & 0xFFFF:

930

raise RevlogError(_('incompatible revision flag %x') %

929

raise RevlogError(_('incompatible revision flag %x') %

931

(self.index[rev][0] & 0xFFFF))

930

(self.index[rev][0] & 0xFFFF))

932

931

933

if self._inline:

932

if self._inline:

934

# we probably have the whole chunk cached

933

# we probably have the whole chunk cached

935

df = None

934

df = None

936

else:

935

else:

937

df = self.opener(self.datafile)

936

df = self.opener(self.datafile)

938

937

939

# do we have useful data cached?

938

# do we have useful data cached?

940

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

939

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

941

base = self._cache[1]

940

base = self._cache[1]

942

text = str(self._cache[2])

941

text = str(self._cache[2])

943

self._loadindex(base, rev + 1)

942

self._loadindex(base, rev + 1)

944

else:

943

else:

945

self._loadindex(base, rev + 1)

944

self._loadindex(base, rev + 1)

946

text = self.chunk(base, df=df)

945

text = self.chunk(base, df=df)

947

946

948

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

947

bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]

949

text = mdiff.patches(text, bins)

948

text = mdiff.patches(text, bins)

950

p1, p2 = self.parents(node)

949

p1, p2 = self.parents(node)

951

if node != hash(text, p1, p2):

950

if node != hash(text, p1, p2):

952

raise RevlogError(_("integrity check failed on %s:%d")

951

raise RevlogError(_("integrity check failed on %s:%d")

953

% (self.datafile, rev))

952

% (self.datafile, rev))

954

953

955

self._cache = (node, rev, text)

954

self._cache = (node, rev, text)

956

return text

955

return text

957

956

958

def checkinlinesize(self, tr, fp=None):

957

def checkinlinesize(self, tr, fp=None):

959

if not self._inline:

958

if not self._inline:

960

return

959

return

961

if not fp:

960

if not fp:

962

fp = self.opener(self.indexfile, 'r')

961

fp = self.opener(self.indexfile, 'r')

963

fp.seek(0, 2)

962

fp.seek(0, 2)

964

size = fp.tell()

963

size = fp.tell()

965

if size < 131072:

964

if size < 131072:

966

return

965

return

967

trinfo = tr.find(self.indexfile)

966

trinfo = tr.find(self.indexfile)

968

if trinfo == None:

967

if trinfo == None:

969

raise RevlogError(_("%s not found in the transaction")

968

raise RevlogError(_("%s not found in the transaction")

970

% self.indexfile)

969

% self.indexfile)

971

970

972

trindex = trinfo[2]

971

trindex = trinfo[2]

973

dataoff = self.start(trindex)

972

dataoff = self.start(trindex)

974

973

975

tr.add(self.datafile, dataoff)

974

tr.add(self.datafile, dataoff)

976

df = self.opener(self.datafile, 'w')

975

df = self.opener(self.datafile, 'w')

977

calc = self._io.size

976

calc = self._io.size

978

for r in xrange(self.count()):

977

for r in xrange(self.count()):

979

start = self.start(r) + (r + 1) * calc

978

start = self.start(r) + (r + 1) * calc

980

length = self.length(r)

979

length = self.length(r)

981

fp.seek(start)

980

fp.seek(start)

982

d = fp.read(length)

981

d = fp.read(length)

983

df.write(d)

982

df.write(d)

984

fp.close()

983

fp.close()

985

df.close()

984

df.close()

986

fp = self.opener(self.indexfile, 'w', atomictemp=True)

985

fp = self.opener(self.indexfile, 'w', atomictemp=True)

987

self.version &= ~(REVLOGNGINLINEDATA)

986

self.version &= ~(REVLOGNGINLINEDATA)

988

self._inline = False

987

self._inline = False

989

for i in xrange(self.count()):

988

for i in xrange(self.count()):

990

e = self._io.packentry(self.index[i], self.node, self.version, i)

989

e = self._io.packentry(self.index[i], self.node, self.version, i)

991

fp.write(e)

990

fp.write(e)

992

991

993

# if we don't call rename, the temp file will never replace the

992

# if we don't call rename, the temp file will never replace the

994

# real index

993

# real index

995

fp.rename()

994

fp.rename()

996

995

997

tr.replace(self.indexfile, trindex * calc)

996

tr.replace(self.indexfile, trindex * calc)

998

self._chunkcache = None

997

self._chunkcache = None

999

998

1000

def addrevision(self, text, transaction, link, p1, p2, d=None):

999

def addrevision(self, text, transaction, link, p1, p2, d=None):

1001

"""add a revision to the log

1000

"""add a revision to the log

1002

1001

1003

text - the revision data to add

1002

text - the revision data to add

1004

transaction - the transaction object used for rollback

1003

transaction - the transaction object used for rollback

1005

link - the linkrev data to add

1004

link - the linkrev data to add

1006

p1, p2 - the parent nodeids of the revision

1005

p1, p2 - the parent nodeids of the revision

1007

d - an optional precomputed delta

1006

d - an optional precomputed delta

1008

"""

1007

"""

1009

dfh = None

1008

dfh = None

1010

if not self._inline:

1009

if not self._inline:

1011

dfh = self.opener(self.datafile, "a")

1010

dfh = self.opener(self.datafile, "a")

1012

ifh = self.opener(self.indexfile, "a+")

1011

ifh = self.opener(self.indexfile, "a+")

1013

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1012

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1014

1013

1015

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1014

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1016

node = hash(text, p1, p2)

1015

node = hash(text, p1, p2)

1017

if node in self.nodemap:

1016

if node in self.nodemap:

1018

return node

1017

return node

1019

1018

1020

curr = self.count()

1019

curr = self.count()

1021

prev = curr - 1

1020

prev = curr - 1

1022

base = self.base(prev)

1021

base = self.base(prev)

1023

offset = self.end(prev)

1022

offset = self.end(prev)

1024

1023

1025

if curr:

1024

if curr:

1026

if not d:

1025

if not d:

1027

ptext = self.revision(self.node(prev))

1026

ptext = self.revision(self.node(prev))

1028

d = mdiff.textdiff(ptext, text)

1027

d = mdiff.textdiff(ptext, text)

1029

data = compress(d)

1028

data = compress(d)

1030

l = len(data[1]) + len(data[0])

1029

l = len(data[1]) + len(data[0])

1031

dist = l + offset - self.start(base)

1030

dist = l + offset - self.start(base)

1032

1031

1033

# full versions are inserted when the needed deltas

1032

# full versions are inserted when the needed deltas

1034

# become comparable to the uncompressed text

1033

# become comparable to the uncompressed text

1035

if not curr or dist > len(text) * 2:

1034

if not curr or dist > len(text) * 2:

1036

data = compress(text)

1035

data = compress(text)

1037

l = len(data[1]) + len(data[0])

1036

l = len(data[1]) + len(data[0])

1038

base = curr

1037

base = curr

1039

1038

1040

e = (offset_type(offset, 0), l, len(text),

1039

e = (offset_type(offset, 0), l, len(text),

1041

base, link, self.rev(p1), self.rev(p2), node)

1040

base, link, self.rev(p1), self.rev(p2), node)

1042

self.index.insert(-1, e)

1041

self.index.insert(-1, e)

1043

self.nodemap[node] = curr

1042

self.nodemap[node] = curr

1044

1043

1045

entry = self._io.packentry(e, self.node, self.version, curr)

1044

entry = self._io.packentry(e, self.node, self.version, curr)

1046

if not self._inline:

1045

if not self._inline:

1047

transaction.add(self.datafile, offset)

1046

transaction.add(self.datafile, offset)

1048

transaction.add(self.indexfile, curr * len(entry))

1047

transaction.add(self.indexfile, curr * len(entry))

1049

if data[0]:

1048

if data[0]:

1050

dfh.write(data[0])

1049

dfh.write(data[0])

1051

dfh.write(data[1])

1050

dfh.write(data[1])

1052

dfh.flush()

1051

dfh.flush()

1053

ifh.write(entry)

1052

ifh.write(entry)

1054

else:

1053

else:

1055

offset += curr * self._io.size

1054

offset += curr * self._io.size

1056

transaction.add(self.indexfile, offset, curr)

1055

transaction.add(self.indexfile, offset, curr)

1057

ifh.write(entry)

1056

ifh.write(entry)

1058

ifh.write(data[0])

1057

ifh.write(data[0])

1059

ifh.write(data[1])

1058

ifh.write(data[1])

1060

self.checkinlinesize(transaction, ifh)

1059

self.checkinlinesize(transaction, ifh)

1061

1060

1062

self._cache = (node, curr, text)

1061

self._cache = (node, curr, text)

1063

return node

1062

return node

1064

1063

1065

def ancestor(self, a, b):

1064

def ancestor(self, a, b):

1066

"""calculate the least common ancestor of nodes a and b"""

1065

"""calculate the least common ancestor of nodes a and b"""

1067

1066

1068

def parents(rev):

1067

def parents(rev):

1069

return [p for p in self.parentrevs(rev) if p != nullrev]

1068

return [p for p in self.parentrevs(rev) if p != nullrev]

1070

1069

1071

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1070

c = ancestor.ancestor(self.rev(a), self.rev(b), parents)

1072

if c is None:

1071

if c is None:

1073

return nullid

1072

return nullid

1074

1073

1075

return self.node(c)

1074

return self.node(c)

1076

1075

1077

def group(self, nodelist, lookup, infocollect=None):

1076

def group(self, nodelist, lookup, infocollect=None):

1078

"""calculate a delta group

1077

"""calculate a delta group

1079

1078

1080

Given a list of changeset revs, return a set of deltas and

1079

Given a list of changeset revs, return a set of deltas and

1081

metadata corresponding to nodes. the first delta is

1080

metadata corresponding to nodes. the first delta is

1082

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1081

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1083

have this parent as it has all history before these

1082

have this parent as it has all history before these

1084

changesets. parent is parent[0]

1083

changesets. parent is parent[0]

1085

"""

1084

"""

1086

revs = [self.rev(n) for n in nodelist]

1085

revs = [self.rev(n) for n in nodelist]

1087

1086

1088

# if we don't have any revisions touched by these changesets, bail

1087

# if we don't have any revisions touched by these changesets, bail

1089

if not revs:

1088

if not revs:

1090

yield changegroup.closechunk()

1089

yield changegroup.closechunk()

1091

return

1090

return

1092

1091

1093

# add the parent of the first rev

1092

# add the parent of the first rev

1094

p = self.parents(self.node(revs[0]))[0]

1093

p = self.parents(self.node(revs[0]))[0]

1095

revs.insert(0, self.rev(p))

1094

revs.insert(0, self.rev(p))

1096

1095

1097

# build deltas

1096

# build deltas

1098

for d in xrange(0, len(revs) - 1):

1097

for d in xrange(0, len(revs) - 1):

1099

a, b = revs[d], revs[d + 1]

1098

a, b = revs[d], revs[d + 1]

1100

nb = self.node(b)

1099

nb = self.node(b)

1101

1100

1102

if infocollect is not None:

1101

if infocollect is not None:

1103

infocollect(nb)

1102

infocollect(nb)

1104

1103

1105

p = self.parents(nb)

1104

p = self.parents(nb)

1106

meta = nb + p[0] + p[1] + lookup(nb)

1105

meta = nb + p[0] + p[1] + lookup(nb)

1107

if a == -1:

1106

if a == -1:

1108

d = self.revision(nb)

1107

d = self.revision(nb)

1109

meta += mdiff.trivialdiffheader(len(d))

1108

meta += mdiff.trivialdiffheader(len(d))

1110

else:

1109

else:

1111

d = self.revdiff(a, b)

1110

d = self.revdiff(a, b)

1112

yield changegroup.chunkheader(len(meta) + len(d))

1111

yield changegroup.chunkheader(len(meta) + len(d))

1113

yield meta

1112

yield meta

1114

if len(d) > 2**20:

1113

if len(d) > 2**20:

1115

pos = 0

1114

pos = 0

1116

while pos < len(d):

1115

while pos < len(d):

1117

pos2 = pos + 2 ** 18

1116

pos2 = pos + 2 ** 18

1118

yield d[pos:pos2]

1117

yield d[pos:pos2]

1119

pos = pos2

1118

pos = pos2

1120

else:

1119

else:

1121

yield d

1120

yield d

1122

1121

1123

yield changegroup.closechunk()

1122

yield changegroup.closechunk()

1124

1123

1125

def addgroup(self, revs, linkmapper, transaction, unique=0):

1124

def addgroup(self, revs, linkmapper, transaction, unique=0):

1126

"""

1125

"""

1127

add a delta group

1126

add a delta group

1128

1127

1129

given a set of deltas, add them to the revision log. the

1128

given a set of deltas, add them to the revision log. the

1130

first delta is against its parent, which should be in our

1129

first delta is against its parent, which should be in our

1131

log, the rest are against the previous delta.

1130

log, the rest are against the previous delta.

1132

"""

1131

"""

1133

1132

1134

#track the base of the current delta log

1133

#track the base of the current delta log

1135

r = self.count()

1134

r = self.count()

1136

t = r - 1

1135

t = r - 1

1137

node = None

1136

node = None

1138

1137

1139

base = prev = nullrev

1138

base = prev = nullrev

1140

start = end = textlen = 0

1139

start = end = textlen = 0

1141

if r:

1140

if r:

1142

end = self.end(t)

1141

end = self.end(t)

1143

1142

1144

ifh = self.opener(self.indexfile, "a+")

1143

ifh = self.opener(self.indexfile, "a+")

1145

isize = r * self._io.size

1144

isize = r * self._io.size

1146

if self._inline:

1145

if self._inline:

1147

transaction.add(self.indexfile, end + isize, r)

1146

transaction.add(self.indexfile, end + isize, r)

1148

dfh = None

1147

dfh = None

1149

else:

1148

else:

1150

transaction.add(self.indexfile, isize, r)

1149

transaction.add(self.indexfile, isize, r)

1151

transaction.add(self.datafile, end)

1150

transaction.add(self.datafile, end)

1152

dfh = self.opener(self.datafile, "a")

1151

dfh = self.opener(self.datafile, "a")

1153

1152

1154

# loop through our set of deltas

1153

# loop through our set of deltas

1155

chain = None

1154

chain = None

1156

for chunk in revs:

1155

for chunk in revs:

1157

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1156

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1158

link = linkmapper(cs)

1157

link = linkmapper(cs)

1159

if node in self.nodemap:

1158

if node in self.nodemap:

1160

# this can happen if two branches make the same change

1159

# this can happen if two branches make the same change

1161

# if unique:

1160

# if unique:

1162

# raise RevlogError(_("already have %s") % hex(node[:4]))

1161

# raise RevlogError(_("already have %s") % hex(node[:4]))

1163

chain = node

1162

chain = node

1164

continue

1163

continue

1165

delta = buffer(chunk, 80)

1164

delta = buffer(chunk, 80)

1166

del chunk

1165

del chunk

1167

1166

1168

for p in (p1, p2):

1167

for p in (p1, p2):

1169

if not p in self.nodemap:

1168

if not p in self.nodemap:

1170

raise LookupError(_("unknown parent %s") % short(p))

1169

raise LookupError(_("unknown parent %s") % short(p))

1171

1170

1172

if not chain:

1171

if not chain:

1173

# retrieve the parent revision of the delta chain

1172

# retrieve the parent revision of the delta chain

1174

chain = p1

1173

chain = p1

1175

if not chain in self.nodemap:

1174

if not chain in self.nodemap:

1176

raise LookupError(_("unknown base %s") % short(chain[:4]))

1175

raise LookupError(_("unknown base %s") % short(chain[:4]))

1177

1176

1178

# full versions are inserted when the needed deltas become

1177

# full versions are inserted when the needed deltas become

1179

# comparable to the uncompressed text or when the previous

1178

# comparable to the uncompressed text or when the previous

1180

# version is not the one we have a delta against. We use

1179

# version is not the one we have a delta against. We use

1181

# the size of the previous full rev as a proxy for the

1180

# the size of the previous full rev as a proxy for the

1182

# current size.

1181

# current size.

1183

1182

1184

if chain == prev:

1183

if chain == prev:

1185

cdelta = compress(delta)

1184

cdelta = compress(delta)

1186

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1185

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1187

textlen = mdiff.patchedsize(textlen, delta)

1186

textlen = mdiff.patchedsize(textlen, delta)

1188

1187

1189

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1188

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1190

# flush our writes here so we can read it in revision

1189

# flush our writes here so we can read it in revision

1191

if dfh:

1190

if dfh:

1192

dfh.flush()

1191

dfh.flush()

1193

ifh.flush()

1192

ifh.flush()

1194

text = self.revision(chain)

1193

text = self.revision(chain)

1195

if len(text) == 0:

1194

if len(text) == 0:

1196

# skip over trivial delta header

1195

# skip over trivial delta header

1197

text = buffer(delta, 12)

1196

text = buffer(delta, 12)

1198

else:

1197

else:

1199

text = mdiff.patches(text, [delta])

1198

text = mdiff.patches(text, [delta])

1200

del delta

1199

del delta

1201

chk = self._addrevision(text, transaction, link, p1, p2, None,

1200

chk = self._addrevision(text, transaction, link, p1, p2, None,

1202

ifh, dfh)

1201

ifh, dfh)

1203

if not dfh and not self._inline:

1202

if not dfh and not self._inline:

1204

# addrevision switched from inline to conventional

1203

# addrevision switched from inline to conventional

1205

# reopen the index

1204

# reopen the index

1206

dfh = self.opener(self.datafile, "a")

1205

dfh = self.opener(self.datafile, "a")

1207

ifh = self.opener(self.indexfile, "a")

1206

ifh = self.opener(self.indexfile, "a")

1208

if chk != node:

1207

if chk != node:

1209

raise RevlogError(_("consistency error adding group"))

1208

raise RevlogError(_("consistency error adding group"))

1210

textlen = len(text)

1209

textlen = len(text)

1211

else:

1210

else:

1212

e = (offset_type(end, 0), cdeltalen, textlen, base,

1211

e = (offset_type(end, 0), cdeltalen, textlen, base,

1213

link, self.rev(p1), self.rev(p2), node)

1212

link, self.rev(p1), self.rev(p2), node)

1214

self.index.insert(-1, e)

1213

self.index.insert(-1, e)

1215

self.nodemap[node] = r

1214

self.nodemap[node] = r

1216

entry = self._io.packentry(e, self.node, self.version, r)

1215

entry = self._io.packentry(e, self.node, self.version, r)

1217

if self._inline:

1216

if self._inline:

1218

ifh.write(entry)

1217

ifh.write(entry)

1219

ifh.write(cdelta[0])

1218

ifh.write(cdelta[0])

1220

ifh.write(cdelta[1])

1219

ifh.write(cdelta[1])

1221

self.checkinlinesize(transaction, ifh)

1220

self.checkinlinesize(transaction, ifh)

1222

if not self._inline:

1221

if not self._inline:

1223

dfh = self.opener(self.datafile, "a")

1222

dfh = self.opener(self.datafile, "a")

1224

ifh = self.opener(self.indexfile, "a")

1223

ifh = self.opener(self.indexfile, "a")

1225

else:

1224

else:

1226

dfh.write(cdelta[0])

1225

dfh.write(cdelta[0])

1227

dfh.write(cdelta[1])

1226

dfh.write(cdelta[1])

1228

ifh.write(entry)

1227

ifh.write(entry)

1229

1228

1230

t, r, chain, prev = r, r + 1, node, node

1229

t, r, chain, prev = r, r + 1, node, node

1231

base = self.base(t)

1230

base = self.base(t)

1232

start = self.start(base)

1231

start = self.start(base)

1233

end = self.end(t)

1232

end = self.end(t)

1234

1233

1235

return node

1234

return node

1236

1235

1237

def strip(self, rev, minlink):

1236

def strip(self, rev, minlink):

1238

if self.count() == 0 or rev >= self.count():

1237

if self.count() == 0 or rev >= self.count():

1239

return

1238

return

1240

1239

1241

if isinstance(self.index, lazyindex):

1240

if isinstance(self.index, lazyindex):

1242

self._loadindexmap()

1241

self._loadindexmap()

1243

1242

1244

# When stripping away a revision, we need to make sure it

1243

# When stripping away a revision, we need to make sure it

1245

# does not actually belong to an older changeset.

1244

# does not actually belong to an older changeset.

1246

# The minlink parameter defines the oldest revision

1245

# The minlink parameter defines the oldest revision

1247

# we're allowed to strip away.

1246

# we're allowed to strip away.

1248

while minlink > self.index[rev][4]:

1247

while minlink > self.index[rev][4]:

1249

rev += 1

1248

rev += 1

1250

if rev >= self.count():

1249

if rev >= self.count():

1251

return

1250

return

1252

1251

1253

# first truncate the files on disk

1252

# first truncate the files on disk

1254

end = self.start(rev)

1253

end = self.start(rev)

1255

if not self._inline:

1254

if not self._inline:

1256

df = self.opener(self.datafile, "a")

1255

df = self.opener(self.datafile, "a")

1257

df.truncate(end)

1256

df.truncate(end)

1258

end = rev * self._io.size

1257

end = rev * self._io.size

1259

else:

1258

else:

1260

end += rev * self._io.size

1259

end += rev * self._io.size

1261

1260

1262

indexf = self.opener(self.indexfile, "a")

1261

indexf = self.opener(self.indexfile, "a")

1263

indexf.truncate(end)

1262

indexf.truncate(end)

1264

1263

1265

# then reset internal state in memory to forget those revisions

1264

# then reset internal state in memory to forget those revisions

1266

self._cache = None

1265

self._cache = None

1267

self._chunkcache = None

1266

self._chunkcache = None

1268

for x in xrange(rev, self.count()):

1267

for x in xrange(rev, self.count()):

1269

del self.nodemap[self.node(x)]

1268

del self.nodemap[self.node(x)]

1270

1269

1271

del self.index[rev:-1]

1270

del self.index[rev:-1]

1272

1271

1273

def checksize(self):

1272

def checksize(self):

1274

expected = 0

1273

expected = 0

1275

if self.count():

1274

if self.count():

1276

expected = max(0, self.end(self.count() - 1))

1275

expected = max(0, self.end(self.count() - 1))

1277

1276

1278

try:

1277

try:

1279

f = self.opener(self.datafile)

1278

f = self.opener(self.datafile)

1280

f.seek(0, 2)

1279

f.seek(0, 2)

1281

actual = f.tell()

1280

actual = f.tell()

1282

dd = actual - expected

1281

dd = actual - expected

1283

except IOError, inst:

1282

except IOError, inst:

1284

if inst.errno != errno.ENOENT:

1283

if inst.errno != errno.ENOENT:

1285

raise

1284

raise

1286

dd = 0

1285

dd = 0

1287

1286

1288

try:

1287

try:

1289

f = self.opener(self.indexfile)

1288

f = self.opener(self.indexfile)

1290

f.seek(0, 2)

1289

f.seek(0, 2)

1291

actual = f.tell()

1290

actual = f.tell()

1292

s = self._io.size

1291

s = self._io.size

1293

i = max(0, actual / s)

1292

i = max(0, actual / s)

1294

di = actual - (i * s)

1293

di = actual - (i * s)

1295

if self._inline:

1294

if self._inline:

1296

databytes = 0

1295

databytes = 0

1297

for r in xrange(self.count()):

1296

for r in xrange(self.count()):

1298

databytes += max(0, self.length(r))

1297

databytes += max(0, self.length(r))

1299

dd = 0

1298

dd = 0

1300

di = actual - self.count() * s - databytes

1299

di = actual - self.count() * s - databytes

1301

except IOError, inst:

1300

except IOError, inst:

1302

if inst.errno != errno.ENOENT:

1301

if inst.errno != errno.ENOENT:

1303

raise

1302

raise

1304

di = 0

1303

di = 0

1305

1304

1306

return (dd, di)

1305

return (dd, di)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """
             revlog.py - storage back-end for mercurial
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches
             Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             This software may be used and distributed according to the terms
             of the GNU General Public License, incorporated herein by reference.
             """
             from node import *
             from i18n import _
             import binascii, changegroup, errno, ancestor, mdiff, os
             import sha, struct, util, zlib
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = sha.new
             # revlog flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             class RevlogError(Exception):
                 pass
             class LookupError(RevlogError):
                 pass
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 l = [p1, p2]
                 l.sort()
                 s = _sha(l[0])
                 s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
+                bin = None
                 if l < 44:
-                    if text[0] == '\0':
+                    pass
-                        return ("", text)
-                    return ('u', text)
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
-                if len(bin) > l:
+                if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     return _decompress(bin)
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
                                                   hasattr(util, 'win32api'))
                 def __init__(self, dataf, size):
                     self.dataf = dataf
                     self.s = struct.calcsize(indexformatng)
                     self.datasize = size
                     self.l = size/self.s
                     self.index = [None] * self.l
                     self.map = {nullid: nullrev}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap:
                         return
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + ngshaoffset:off + ngshaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all:
                         return
                     if data is None:
                         self.dataf.seek(blockstart)
                         if blockstart + blocksize > self.datasize:
                             # the revlog may have grown since we've started running,
                             # but we don't have space in self.index for more entries.
                             # limit blocksize so that we don't get too much data.
                             blocksize = max(self.datasize - blockstart, 0)
                         data = self.dataf.read(blocksize)
                     lend = len(data) / self.s
                     i = blockstart / self.s
                     off = 0
                     # lazyindex supports __delitem__
                     if lend > len(self.index) - i:
                         lend = len(self.index) - i
                     for x in xrange(lend):
                         if self.index[i + x] == None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[ngshaoffset:ngshaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap:
                         return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so we have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.find(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + ngshaoffset:off + ngshaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all:
                         return
                     all = False
                     if i == None:
                         blockstart = 0
                         blocksize = (65536 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~1023) * self.s
                             blocksize = self.s * 1024
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all:
                         self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = _pack(indexformatng, *item)
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def insert(self, pos, e):
                     self.p.index.insert(pos, _pack(indexformatng, *e))
                 def append(self, e):
                     self.p.index.append(_pack(indexformatng, *e))
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n == None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i in xrange(self.p.l):
                         ret = self.p.index[i]
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = _unpack(indexformatng, ret)
                         yield ret[7]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, fp, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     data = fp.read()
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap[e[4]], nodemap[e[5]], e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             # 6 bytes offset
             # 2 bytes flags
             # 4 bytes compressed length
             # 4 bytes uncompressed length
             # 4 bytes: base rev
             # 4 bytes link rev
             # 4 bytes parent 1 rev
             # 4 bytes parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, fp, inline):
                     try:
                         size = util.fstat(fp).st_size
                     except AttributeError:
                         size = 0
                     if lazyparser.safe_to_use and not inline and size > 1000000:
                         # big index, let's parse it on demand
                         parser = lazyparser(fp, size)
                         index = lazyindex(parser)
                         nodemap = lazymap(parser)
                         e = list(index[0])
                         type = gettype(e[0])
                         e[0] = offset_type(0, type)
                         index[0] = e
                         return index, nodemap, None
                     s = self.size
                     cache = None
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     # if we're not using lazymap, always read the whole index
                     data = fp.read()
                     l = len(data) - s
                     append = index.append
                     if inline:
                         cache = (0, data)
                         while off <= l:
                             e = _unpack(indexformatng, data[off:off + s])
                             nodemap[e[7]] = n
                             append(e)
                             n += 1
                             if e[1] < 0:
                                 break
                             off += e[1] + s
                     else:
                         while off <= l:
                             e = _unpack(indexformatng, data[off:off + s])
                             nodemap[e[7]] = n
                             append(e)
                             n += 1
                             off += s
                     e = list(index[0])
                     type = gettype(e[0])
                     e[0] = offset_type(0, type)
                     index[0] = e
                     return index, nodemap, cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, includings its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._chunkcache = None
                     self.nodemap = {nullid: nullrev}
                     self.index = []
                     v = REVLOG_DEFAULT_VERSION
                     if hasattr(opener, "defversion"):
                         v = opener.defversion
                         if v & REVLOGNG:
                             v |= REVLOGNGINLINEDATA
                     i = ""
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read(4)
                         f.seek(0)
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i)[0]
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     if i:
                         d = self._io.parseindex(f, self._inline)
                         self.index, self.nodemap, self._chunkcache = d
                     # add the magic null revision at -1
                     self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                 def _loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def _loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def _loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def count(self):
                     return len(self.index) - 1
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, node):
                     return self.index[self.rev(node)][4]
                 def parents(self, node):
                     d = self.index[self.rev(node)][5:7]
                     return (self.node(d[0]), self.node(d[1]))
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def base(self, rev):
                     return self.index[rev][3]
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                     # alternate implementation, The advantage to this code is it
                     # will be faster for a single revision.  But, the results are not
                     # cached, so finding the size of every revision will be slower.
                     """
                     if self.cache and self.cache[1] == rev:
                         return len(self.cache[2])
                     base = self.base(rev)
                     if self.cache and self.cache[1] >= base and self.cache[1] < rev:
                         base = self.cache[1]
                         text = self.cache[2]
                     else:
                         text = self.revision(self.node(base))
                     l = len(text)
                     for x in xrange(base + 1, rev + 1):
                         l = mdiff.patchedsize(l, self.chunk(x))
                     return l
                     """
                 def reachable(self, node, stop=None):
                     """return a hash of all nodes ancestral to a given node, including
                      the node itself, stopping when stop is matched"""
                     reachable = {}
                     visit = [node]
                     reachable[node] = 1
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable[p] = 1
                                 visit.append(p)
                     return reachable
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a tuple containing three elements. Elements 1 and 2 contain
                     a final list bases and heads after all the unreachable ones have been
                     pruned.  Element 0 contains a topologically sorted list of all
                     nodes that satisfy these constraints:
 . All nodes must be descended from a node in roots (the nodes on
                        roots are considered descended from themselves).
 . All nodes must also be ancestors of a node in heads (the nodes in
                        heads are considered to be their own ancestors).
                     If roots is unspecified, nullid is assumed as the only root.
                     If heads is unspecified, it is taken to be the output of the
                     heads method (i.e. a list of all nodes in the repository that
                     have no children)."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in xrange(0, self.count())],
                                 [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = self.count() - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = {}
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = heads.keys()
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors[n] = 1 # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.extend([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a 'set' (i.e. a dictionary where the
                     # values don't matter.
                     descendents = dict.fromkeys(roots, 1)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == nullrev:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.pop(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents[n] = 1
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = roots.keys()
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         count = self.count()
                         if not count:
                             return [nullid]
                         ishead = [1] * (count + 1)
                         index = self.index
                         for r in xrange(count):
                             e = index[r]
                             ishead[e[5]] = ishead[e[6]] = 0
                         return [self.node(r) for r in xrange(count) if ishead[r]]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = dict.fromkeys([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev: 1}
                     heads = {startrev: 1}
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, self.count()):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable[r] = 1
                                 heads[r] = 1
                             if p in heads and p not in stoprevs:
                                 del heads[p]
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, self.count()):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def _match(self, id):
                     if isinstance(id, (long, int)):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             r = self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = self.count() + rev
                         if rev < 0 or rev >= self.count():
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             r = self.rev(node)
                             return node
                         except TypeError:
                             pass
                 def _partialmatch(self, id):
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
                             node = None
                             for n in self.nodemap:
                                 if n.startswith(bin_id) and hex(n).startswith(id):
                                     if node is not None:
                                         raise LookupError(_("Ambiguous identifier"))
                                     node = n
                             if node is not None:
                                 return node
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(_("No match found"))
                 def cmp(self, node, text):
                     """compare text with a given file revision"""
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def chunk(self, rev, df=None):
                     def loadcache(df):
                         if not df:
                             if self._inline:
                                 df = self.opener(self.indexfile)
                             else:
                                 df = self.opener(self.datafile)
                         df.seek(start)
                         self._chunkcache = (start, df.read(cache_length))
                     start, length = self.start(rev), self.length(rev)
                     if self._inline:
                         start += (rev + 1) * self._io.size
                     end = start + length
                     offset = 0
                     if not self._chunkcache:
                         cache_length = max(65536, length)
                         loadcache(df)
                     else:
                         cache_start = self._chunkcache[0]
                         cache_length = len(self._chunkcache[1])
                         cache_end = cache_start + cache_length
                         if start >= cache_start and end <= cache_end:
                             # it is cached
                             offset = start - cache_start
                         else:
                             cache_length = max(65536, length)
                             loadcache(df)
                     # avoid copying large chunks
                     c = self._chunkcache[1]
                     if cache_length != length:
                         c = c[offset:offset + length]
                     return decompress(c)
                 def delta(self, node):
                     """return or calculate a delta between a node and its predecessor"""
                     r = self.rev(node)
                     return self.revdiff(r - 1, r)
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
                         return self.chunk(rev2)
                     return mdiff.textdiff(self.revision(self.node(rev1)),
                                           self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given"""
                     if node == nullid:
                         return ""
                     if self._cache and self._cache[0] == node:
                         return str(self._cache[2])
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # check rev flags
                     if self.index[rev][0] & 0xFFFF:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.index[rev][0] & 0xFFFF))
                     if self._inline:
                         # we probably have the whole chunk cached
                         df = None
                     else:
                         df = self.opener(self.datafile)
                     # do we have useful data cached?
                     if self._cache and self._cache[1] >= base and self._cache[1] < rev:
                         base = self._cache[1]
                         text = str(self._cache[2])
                         self._loadindex(base, rev + 1)
                     else:
                         self._loadindex(base, rev + 1)
                         text = self.chunk(base, df=df)
                     bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
                     text = mdiff.patches(text, bins)
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.datafile, rev))
                     self._cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline:
                         return
                     if not fp:
                         fp = self.opener(self.indexfile, 'r')
                         fp.seek(0, 2)
                     size = fp.tell()
                     if size < 131072:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo == None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     df = self.opener(self.datafile, 'w')
                     calc = self._io.size
                     for r in xrange(self.count()):
                         start = self.start(r) + (r + 1) * calc
                         length = self.length(r)
                         fp.seek(start)
                         d = fp.read(length)
                         df.write(d)
                     fp.close()
                     df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in xrange(self.count()):
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * calc)
                     self._chunkcache = None
                 def addrevision(self, text, transaction, link, p1, p2, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
                 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     curr = self.count()
                     prev = curr - 1
                     base = self.base(prev)
                     offset = self.end(prev)
                     if curr:
                         if not d:
                             ptext = self.revision(self.node(prev))
                             d = mdiff.textdiff(ptext, text)
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = l + offset - self.start(base)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if not curr or dist > len(text) * 2:
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = curr
                     e = (offset_type(offset, 0), l, len(text),
                          base, link, self.rev(p1), self.rev(p2), node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     self._cache = (node, curr, text)
                     return node
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def group(self, nodelist, lookup, infocollect=None):
                     """calculate a delta group
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parents(self.node(revs[0]))[0]
                     revs.insert(0, self.rev(p))
                     # build deltas
                     for d in xrange(0, len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         if a == -1:
                             d = self.revision(nb)
                             meta += mdiff.trivialdiffheader(len(d))
                         else:
                             d = self.revdiff(a, b)
                         yield changegroup.chunkheader(len(meta) + len(d))
                         yield meta
                         if len(d) > 2**20:
                             pos = 0
                             while pos < len(d):
                                 pos2 = pos + 2 ** 18
                                 yield d[pos:pos2]
                                 pos = pos2
                         else:
                             yield d
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction, unique=0):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = self.count()
                     t = r - 1
                     node = None
                     base = prev = nullrev
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     # loop through our set of deltas
                     chain = None
                     for chunk in revs:
                         node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                         link = linkmapper(cs)
                         if node in self.nodemap:
                             # this can happen if two branches make the same change
                             # if unique:
                             #    raise RevlogError(_("already have %s") % hex(node[:4]))
                             chain = node
                             continue
                         delta = buffer(chunk, 80)
                         del chunk
                         for p in (p1, p2):
                             if not p in self.nodemap:
                                 raise LookupError(_("unknown parent %s") % short(p))
                         if not chain:
                             # retrieve the parent revision of the delta chain
                             chain = p1
                             if not chain in self.nodemap:
                                 raise LookupError(_("unknown base %s") % short(chain[:4]))
                         # full versions are inserted when the needed deltas become
                         # comparable to the uncompressed text or when the previous
                         # version is not the one we have a delta against. We use
                         # the size of the previous full rev as a proxy for the
                         # current size.
                         if chain == prev:
                             cdelta = compress(delta)
                             cdeltalen = len(cdelta[0]) + len(cdelta[1])
                             textlen = mdiff.patchedsize(textlen, delta)
                         if chain != prev or (end - start + cdeltalen) > textlen * 2:
                             # flush our writes here so we can read it in revision
                             if dfh:
                                 dfh.flush()
                             ifh.flush()
                             text = self.revision(chain)
                             if len(text) == 0:
                                 # skip over trivial delta header
                                 text = buffer(delta, 12)
                             else:
                                 text = mdiff.patches(text, [delta])
                             del delta
                             chk = self._addrevision(text, transaction, link, p1, p2, None,
                                                     ifh, dfh)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 dfh = self.opener(self.datafile, "a")
                                 ifh = self.opener(self.indexfile, "a")
                             if chk != node:
                                 raise RevlogError(_("consistency error adding group"))
                             textlen = len(text)
                         else:
                             e = (offset_type(end, 0), cdeltalen, textlen, base,
                                  link, self.rev(p1), self.rev(p2), node)
                             self.index.insert(-1, e)
                             self.nodemap[node] = r
                             entry = self._io.packentry(e, self.node, self.version, r)
                             if self._inline:
                                 ifh.write(entry)
                                 ifh.write(cdelta[0])
                                 ifh.write(cdelta[1])
                                 self.checkinlinesize(transaction, ifh)
                                 if not self._inline:
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                             else:
                                 dfh.write(cdelta[0])
                                 dfh.write(cdelta[1])
                                 ifh.write(entry)
                         t, r, chain, prev = r, r + 1, node, node
                         base = self.base(t)
                         start = self.start(base)
                         end = self.end(t)
                     return node
                 def strip(self, rev, minlink):
                     if self.count() == 0 or rev >= self.count():
                         return
                     if isinstance(self.index, lazyindex):
                         self._loadindexmap()
                     # When stripping away a revision, we need to make sure it
                     # does not actually belong to an older changeset.
                     # The minlink parameter defines the oldest revision
                     # we're allowed to strip away.
                     while minlink > self.index[rev][4]:
                         rev += 1
                         if rev >= self.count():
                             return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         df = self.opener(self.datafile, "a")
                         df.truncate(end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     indexf = self.opener(self.indexfile, "a")
                     indexf.truncate(end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkcache = None
                     for x in xrange(rev, self.count()):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if self.count():
                         expected = max(0, self.end(self.count() - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = self._io.size
                         i = max(0, actual / s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in xrange(self.count()):
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - self.count() * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)