upstream/mercurial-mirror Commit - r11745:138c055e

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from node import bin, hex, nullid, nullrev, short #@UnusedImport

15

from node import bin, hex, nullid, nullrev, short #@UnusedImport

16

from i18n import _

16

from i18n import _

17

import changegroup, ancestor, mdiff, parsers, error, util

17

import changegroup, ancestor, mdiff, parsers, error, util

18

import struct, zlib, errno

18

import struct, zlib, errno

19

20

_pack = struct.pack

20

_pack = struct.pack

21

_unpack = struct.unpack

21

_unpack = struct.unpack

22

_compress = zlib.compress

22

_compress = zlib.compress

23

_decompress = zlib.decompress

23

_decompress = zlib.decompress

24

_sha = util.sha1

24

_sha = util.sha1

25

26

# revlog flags

26

# revlog flags

27

REVLOGV0 = 0

27

REVLOGV0 = 0

28

REVLOGNG = 1

28

REVLOGNG = 1

29

REVLOGNGINLINEDATA = (1 << 16)

29

REVLOGNGINLINEDATA = (1 << 16)

30

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

30

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

31

REVLOG_DEFAULT_FORMAT = REVLOGNG

31

REVLOG_DEFAULT_FORMAT = REVLOGNG

32

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

32

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

33

REVIDX_PUNCHED_FLAG = 2

34

REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG

33

35

34

# amount of data read unconditionally, should be >= 4

36

# amount of data read unconditionally, should be >= 4

35

# when not inline: threshold for using lazy index

37

# when not inline: threshold for using lazy index

36

_prereadsize = 1048576

38

_prereadsize = 1048576

37

# max size of revlog with inline data

39

# max size of revlog with inline data

38

_maxinline = 131072

40

_maxinline = 131072

39

41

40

RevlogError = error.RevlogError

42

RevlogError = error.RevlogError

41

LookupError = error.LookupError

43

LookupError = error.LookupError

42

44

43

def getoffset(q):

45

def getoffset(q):

44

return int(q >> 16)

46

return int(q >> 16)

45

47

46

def gettype(q):

48

def gettype(q):

47

return int(q & 0xFFFF)

49

return int(q & 0xFFFF)

48

50

49

def offset_type(offset, type):

51

def offset_type(offset, type):

50

return long(long(offset) << 16 | type)

52

return long(long(offset) << 16 | type)

51

53

52

nullhash = _sha(nullid)

54

nullhash = _sha(nullid)

53

55

54

def hash(text, p1, p2):

56

def hash(text, p1, p2):

55

"""generate a hash from the given text and its parent hashes

57

"""generate a hash from the given text and its parent hashes

56

58

57

This hash combines both the current file contents and its history

59

This hash combines both the current file contents and its history

58

in a manner that makes it easy to distinguish nodes with the same

60

in a manner that makes it easy to distinguish nodes with the same

59

content in the revision graph.

61

content in the revision graph.

60

"""

62

"""

61

# As of now, if one of the parent node is null, p2 is null

63

# As of now, if one of the parent node is null, p2 is null

62

if p2 == nullid:

64

if p2 == nullid:

63

# deep copy of a hash is faster than creating one

65

# deep copy of a hash is faster than creating one

64

s = nullhash.copy()

66

s = nullhash.copy()

65

s.update(p1)

67

s.update(p1)

66

else:

68

else:

67

# none of the parent nodes are nullid

69

# none of the parent nodes are nullid

68

l = [p1, p2]

70

l = [p1, p2]

69

l.sort()

71

l.sort()

70

s = _sha(l[0])

72

s = _sha(l[0])

71

s.update(l[1])

73

s.update(l[1])

72

s.update(text)

74

s.update(text)

73

return s.digest()

75

return s.digest()

74

76

75

def compress(text):

77

def compress(text):

76

""" generate a possibly-compressed representation of text """

78

""" generate a possibly-compressed representation of text """

77

if not text:

79

if not text:

78

return ("", text)

80

return ("", text)

79

l = len(text)

81

l = len(text)

80

bin = None

82

bin = None

81

if l < 44:

83

if l < 44:

82

pass

84

pass

83

elif l > 1000000:

85

elif l > 1000000:

84

# zlib makes an internal copy, thus doubling memory usage for

86

# zlib makes an internal copy, thus doubling memory usage for

85

# large files, so lets do this in pieces

87

# large files, so lets do this in pieces

86

z = zlib.compressobj()

88

z = zlib.compressobj()

87

p = []

89

p = []

88

pos = 0

90

pos = 0

89

while pos < l:

91

while pos < l:

90

pos2 = pos + 2**20

92

pos2 = pos + 2**20

91

p.append(z.compress(text[pos:pos2]))

93

p.append(z.compress(text[pos:pos2]))

92

pos = pos2

94

pos = pos2

93

p.append(z.flush())

95

p.append(z.flush())

94

if sum(map(len, p)) < l:

96

if sum(map(len, p)) < l:

95

bin = "".join(p)

97

bin = "".join(p)

96

else:

98

else:

97

bin = _compress(text)

99

bin = _compress(text)

98

if bin is None or len(bin) > l:

100

if bin is None or len(bin) > l:

99

if text[0] == '\0':

101

if text[0] == '\0':

100

return ("", text)

102

return ("", text)

101

return ('u', text)

103

return ('u', text)

102

return ("", bin)

104

return ("", bin)

103

105

104

def decompress(bin):

106

def decompress(bin):

105

""" decompress the given input """

107

""" decompress the given input """

106

if not bin:

108

if not bin:

107

return bin

109

return bin

108

t = bin[0]

110

t = bin[0]

109

if t == '\0':

111

if t == '\0':

110

return bin

112

return bin

111

if t == 'x':

113

if t == 'x':

112

return _decompress(bin)

114

return _decompress(bin)

113

if t == 'u':

115

if t == 'u':

114

return bin[1:]

116

return bin[1:]

115

raise RevlogError(_("unknown compression type %r") % t)

117

raise RevlogError(_("unknown compression type %r") % t)

116

118

117

class lazyparser(object):

119

class lazyparser(object):

118

"""

120

"""

119

this class avoids the need to parse the entirety of large indices

121

this class avoids the need to parse the entirety of large indices

120

"""

122

"""

121

123

122

# lazyparser is not safe to use on windows if win32 extensions not

124

# lazyparser is not safe to use on windows if win32 extensions not

123

# available. it keeps file handle open, which make it not possible

125

# available. it keeps file handle open, which make it not possible

124

# to break hardlinks on local cloned repos.

126

# to break hardlinks on local cloned repos.

125

127

126

def __init__(self, dataf):

128

def __init__(self, dataf):

127

try:

129

try:

128

size = util.fstat(dataf).st_size

130

size = util.fstat(dataf).st_size

129

except AttributeError:

131

except AttributeError:

130

size = 0

132

size = 0

131

self.dataf = dataf

133

self.dataf = dataf

132

self.s = struct.calcsize(indexformatng)

134

self.s = struct.calcsize(indexformatng)

133

self.datasize = size

135

self.datasize = size

134

self.l = size // self.s

136

self.l = size // self.s

135

self.index = [None] * self.l

137

self.index = [None] * self.l

136

self.map = {nullid: nullrev}

138

self.map = {nullid: nullrev}

137

self.allmap = 0

139

self.allmap = 0

138

self.all = 0

140

self.all = 0

139

self.mapfind_count = 0

141

self.mapfind_count = 0

140

142

141

def loadmap(self):

143

def loadmap(self):

142

"""

144

"""

143

during a commit, we need to make sure the rev being added is

145

during a commit, we need to make sure the rev being added is

144

not a duplicate. This requires loading the entire index,

146

not a duplicate. This requires loading the entire index,

145

which is fairly slow. loadmap can load up just the node map,

147

which is fairly slow. loadmap can load up just the node map,

146

which takes much less time.

148

which takes much less time.

147

"""

149

"""

148

if self.allmap:

150

if self.allmap:

149

return

151

return

150

end = self.datasize

152

end = self.datasize

151

self.allmap = 1

153

self.allmap = 1

152

cur = 0

154

cur = 0

153

count = 0

155

count = 0

154

blocksize = self.s * 256

156

blocksize = self.s * 256

155

self.dataf.seek(0)

157

self.dataf.seek(0)

156

while cur < end:

158

while cur < end:

157

data = self.dataf.read(blocksize)

159

data = self.dataf.read(blocksize)

158

off = 0

160

off = 0

159

for x in xrange(256):

161

for x in xrange(256):

160

n = data[off + ngshaoffset:off + ngshaoffset + 20]

162

n = data[off + ngshaoffset:off + ngshaoffset + 20]

161

self.map[n] = count

163

self.map[n] = count

162

count += 1

164

count += 1

163

if count >= self.l:

165

if count >= self.l:

164

break

166

break

165

off += self.s

167

off += self.s

166

cur += blocksize

168

cur += blocksize

167

169

168

def loadblock(self, blockstart, blocksize, data=None):

170

def loadblock(self, blockstart, blocksize, data=None):

169

if self.all:

171

if self.all:

170

return

172

return

171

if data is None:

173

if data is None:

172

self.dataf.seek(blockstart)

174

self.dataf.seek(blockstart)

173

if blockstart + blocksize > self.datasize:

175

if blockstart + blocksize > self.datasize:

174

# the revlog may have grown since we've started running,

176

# the revlog may have grown since we've started running,

175

# but we don't have space in self.index for more entries.

177

# but we don't have space in self.index for more entries.

176

# limit blocksize so that we don't get too much data.

178

# limit blocksize so that we don't get too much data.

177

blocksize = max(self.datasize - blockstart, 0)

179

blocksize = max(self.datasize - blockstart, 0)

178

data = self.dataf.read(blocksize)

180

data = self.dataf.read(blocksize)

179

lend = len(data) // self.s

181

lend = len(data) // self.s

180

i = blockstart // self.s

182

i = blockstart // self.s

181

off = 0

183

off = 0

182

# lazyindex supports __delitem__

184

# lazyindex supports __delitem__

183

if lend > len(self.index) - i:

185

if lend > len(self.index) - i:

184

lend = len(self.index) - i

186

lend = len(self.index) - i

185

for x in xrange(lend):

187

for x in xrange(lend):

186

if self.index[i + x] is None:

188

if self.index[i + x] is None:

187

b = data[off : off + self.s]

189

b = data[off : off + self.s]

188

self.index[i + x] = b

190

self.index[i + x] = b

189

n = b[ngshaoffset:ngshaoffset + 20]

191

n = b[ngshaoffset:ngshaoffset + 20]

190

self.map[n] = i + x

192

self.map[n] = i + x

191

off += self.s

193

off += self.s

192

194

193

def findnode(self, node):

195

def findnode(self, node):

194

"""search backwards through the index file for a specific node"""

196

"""search backwards through the index file for a specific node"""

195

if self.allmap:

197

if self.allmap:

196

return None

198

return None

197

199

198

# hg log will cause many many searches for the manifest

200

# hg log will cause many many searches for the manifest

199

# nodes. After we get called a few times, just load the whole

201

# nodes. After we get called a few times, just load the whole

200

# thing.

202

# thing.

201

if self.mapfind_count > 8:

203

if self.mapfind_count > 8:

202

self.loadmap()

204

self.loadmap()

203

if node in self.map:

205

if node in self.map:

204

return node

206

return node

205

return None

207

return None

206

self.mapfind_count += 1

208

self.mapfind_count += 1

207

last = self.l - 1

209

last = self.l - 1

208

while self.index[last] != None:

210

while self.index[last] != None:

209

if last == 0:

211

if last == 0:

210

self.all = 1

212

self.all = 1

211

self.allmap = 1

213

self.allmap = 1

212

return None

214

return None

213

last -= 1

215

last -= 1

214

end = (last + 1) * self.s

216

end = (last + 1) * self.s

215

blocksize = self.s * 256

217

blocksize = self.s * 256

216

while end >= 0:

218

while end >= 0:

217

start = max(end - blocksize, 0)

219

start = max(end - blocksize, 0)

218

self.dataf.seek(start)

220

self.dataf.seek(start)

219

data = self.dataf.read(end - start)

221

data = self.dataf.read(end - start)

220

findend = end - start

222

findend = end - start

221

while True:

223

while True:

222

# we're searching backwards, so we have to make sure

224

# we're searching backwards, so we have to make sure

223

# we don't find a changeset where this node is a parent

225

# we don't find a changeset where this node is a parent

224

off = data.find(node, 0, findend)

226

off = data.find(node, 0, findend)

225

findend = off

227

findend = off

226

if off >= 0:

228

if off >= 0:

227

i = off / self.s

229

i = off / self.s

228

off = i * self.s

230

off = i * self.s

229

n = data[off + ngshaoffset:off + ngshaoffset + 20]

231

n = data[off + ngshaoffset:off + ngshaoffset + 20]

230

if n == node:

232

if n == node:

231

self.map[n] = i + start / self.s

233

self.map[n] = i + start / self.s

232

return node

234

return node

233

else:

235

else:

234

break

236

break

235

end -= blocksize

237

end -= blocksize

236

return None

238

return None

237

239

238

def loadindex(self, i=None, end=None):

240

def loadindex(self, i=None, end=None):

239

if self.all:

241

if self.all:

240

return

242

return

241

all = False

243

all = False

242

if i is None:

244

if i is None:

243

blockstart = 0

245

blockstart = 0

244

blocksize = (65536 / self.s) * self.s

246

blocksize = (65536 / self.s) * self.s

245

end = self.datasize

247

end = self.datasize

246

all = True

248

all = True

247

else:

249

else:

248

if end:

250

if end:

249

blockstart = i * self.s

251

blockstart = i * self.s

250

end = end * self.s

252

end = end * self.s

251

blocksize = end - blockstart

253

blocksize = end - blockstart

252

else:

254

else:

253

blockstart = (i & ~1023) * self.s

255

blockstart = (i & ~1023) * self.s

254

blocksize = self.s * 1024

256

blocksize = self.s * 1024

255

end = blockstart + blocksize

257

end = blockstart + blocksize

256

while blockstart < end:

258

while blockstart < end:

257

self.loadblock(blockstart, blocksize)

259

self.loadblock(blockstart, blocksize)

258

blockstart += blocksize

260

blockstart += blocksize

259

if all:

261

if all:

260

self.all = True

262

self.all = True

261

263

262

class lazyindex(object):

264

class lazyindex(object):

263

"""a lazy version of the index array"""

265

"""a lazy version of the index array"""

264

def __init__(self, parser):

266

def __init__(self, parser):

265

self.p = parser

267

self.p = parser

266

def __len__(self):

268

def __len__(self):

267

return len(self.p.index)

269

return len(self.p.index)

268

def load(self, pos):

270

def load(self, pos):

269

if pos < 0:

271

if pos < 0:

270

pos += len(self.p.index)

272

pos += len(self.p.index)

271

self.p.loadindex(pos)

273

self.p.loadindex(pos)

272

return self.p.index[pos]

274

return self.p.index[pos]

273

def __getitem__(self, pos):

275

def __getitem__(self, pos):

274

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

276

return _unpack(indexformatng, self.p.index[pos] or self.load(pos))

275

def __setitem__(self, pos, item):

277

def __setitem__(self, pos, item):

276

self.p.index[pos] = _pack(indexformatng, *item)

278

self.p.index[pos] = _pack(indexformatng, *item)

277

def __delitem__(self, pos):

279

def __delitem__(self, pos):

278

del self.p.index[pos]

280

del self.p.index[pos]

279

def insert(self, pos, e):

281

def insert(self, pos, e):

280

self.p.index.insert(pos, _pack(indexformatng, *e))

282

self.p.index.insert(pos, _pack(indexformatng, *e))

281

def append(self, e):

283

def append(self, e):

282

self.p.index.append(_pack(indexformatng, *e))

284

self.p.index.append(_pack(indexformatng, *e))

283

285

284

class lazymap(object):

286

class lazymap(object):

285

"""a lazy version of the node map"""

287

"""a lazy version of the node map"""

286

def __init__(self, parser):

288

def __init__(self, parser):

287

self.p = parser

289

self.p = parser

288

def load(self, key):

290

def load(self, key):

289

n = self.p.findnode(key)

291

n = self.p.findnode(key)

290

if n is None:

292

if n is None:

291

raise KeyError(key)

293

raise KeyError(key)

292

def __contains__(self, key):

294

def __contains__(self, key):

293

if key in self.p.map:

295

if key in self.p.map:

294

return True

296

return True

295

self.p.loadmap()

297

self.p.loadmap()

296

return key in self.p.map

298

return key in self.p.map

297

def __iter__(self):

299

def __iter__(self):

298

yield nullid

300

yield nullid

299

for i, ret in enumerate(self.p.index):

301

for i, ret in enumerate(self.p.index):

300

if not ret:

302

if not ret:

301

self.p.loadindex(i)

303

self.p.loadindex(i)

302

ret = self.p.index[i]

304

ret = self.p.index[i]

303

if isinstance(ret, str):

305

if isinstance(ret, str):

304

ret = _unpack(indexformatng, ret)

306

ret = _unpack(indexformatng, ret)

305

yield ret[7]

307

yield ret[7]

306

def __getitem__(self, key):

308

def __getitem__(self, key):

307

try:

309

try:

308

return self.p.map[key]

310

return self.p.map[key]

309

except KeyError:

311

except KeyError:

310

try:

312

try:

311

self.load(key)

313

self.load(key)

312

return self.p.map[key]

314

return self.p.map[key]

313

except KeyError:

315

except KeyError:

314

raise KeyError("node " + hex(key))

316

raise KeyError("node " + hex(key))

315

def __setitem__(self, key, val):

317

def __setitem__(self, key, val):

316

self.p.map[key] = val

318

self.p.map[key] = val

317

def __delitem__(self, key):

319

def __delitem__(self, key):

318

del self.p.map[key]

320

del self.p.map[key]

319

321

320

indexformatv0 = ">4l20s20s20s"

322

indexformatv0 = ">4l20s20s20s"

321

v0shaoffset = 56

323

v0shaoffset = 56

322

324

323

class revlogoldio(object):

325

class revlogoldio(object):

324

def __init__(self):

326

def __init__(self):

325

self.size = struct.calcsize(indexformatv0)

327

self.size = struct.calcsize(indexformatv0)

326

328

327

def parseindex(self, fp, data, inline):

329

def parseindex(self, fp, data, inline):

328

s = self.size

330

s = self.size

329

index = []

331

index = []

330

nodemap = {nullid: nullrev}

332

nodemap = {nullid: nullrev}

331

n = off = 0

333

n = off = 0

332

if len(data) == _prereadsize:

334

if len(data) == _prereadsize:

333

data += fp.read() # read the rest

335

data += fp.read() # read the rest

334

l = len(data)

336

l = len(data)

335

while off + s <= l:

337

while off + s <= l:

336

cur = data[off:off + s]

338

cur = data[off:off + s]

337

off += s

339

off += s

338

e = _unpack(indexformatv0, cur)

340

e = _unpack(indexformatv0, cur)

339

# transform to revlogv1 format

341

# transform to revlogv1 format

340

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

342

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

341

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

343

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

342

index.append(e2)

344

index.append(e2)

343

nodemap[e[6]] = n

345

nodemap[e[6]] = n

344

n += 1

346

n += 1

345

347

346

return index, nodemap, None

348

return index, nodemap, None

347

349

348

def packentry(self, entry, node, version, rev):

350

def packentry(self, entry, node, version, rev):

349

if gettype(entry[0]):

351

if gettype(entry[0]):

350

raise RevlogError(_("index entry flags need RevlogNG"))

352

raise RevlogError(_("index entry flags need RevlogNG"))

351

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

353

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

352

node(entry[5]), node(entry[6]), entry[7])

354

node(entry[5]), node(entry[6]), entry[7])

353

return _pack(indexformatv0, *e2)

355

return _pack(indexformatv0, *e2)

354

356

355

# index ng:

357

# index ng:

356

# 6 bytes: offset

358

# 6 bytes: offset

357

# 2 bytes: flags

359

# 2 bytes: flags

358

# 4 bytes: compressed length

360

# 4 bytes: compressed length

359

# 4 bytes: uncompressed length

361

# 4 bytes: uncompressed length

360

# 4 bytes: base rev

362

# 4 bytes: base rev

361

# 4 bytes: link rev

363

# 4 bytes: link rev

362

# 4 bytes: parent 1 rev

364

# 4 bytes: parent 1 rev

363

# 4 bytes: parent 2 rev

365

# 4 bytes: parent 2 rev

364

# 32 bytes: nodeid

366

# 32 bytes: nodeid

365

indexformatng = ">Qiiiiii20s12x"

367

indexformatng = ">Qiiiiii20s12x"

366

ngshaoffset = 32

368

ngshaoffset = 32

367

versionformat = ">I"

369

versionformat = ">I"

368

370

369

class revlogio(object):

371

class revlogio(object):

370

def __init__(self):

372

def __init__(self):

371

self.size = struct.calcsize(indexformatng)

373

self.size = struct.calcsize(indexformatng)

372

374

373

def parseindex(self, fp, data, inline):

375

def parseindex(self, fp, data, inline):

374

if len(data) == _prereadsize:

376

if len(data) == _prereadsize:

375

if util.openhardlinks() and not inline:

377

if util.openhardlinks() and not inline:

376

# big index, let's parse it on demand

378

# big index, let's parse it on demand

377

parser = lazyparser(fp)

379

parser = lazyparser(fp)

378

index = lazyindex(parser)

380

index = lazyindex(parser)

379

nodemap = lazymap(parser)

381

nodemap = lazymap(parser)

380

e = list(index[0])

382

e = list(index[0])

381

type = gettype(e[0])

383

type = gettype(e[0])

382

e[0] = offset_type(0, type)

384

e[0] = offset_type(0, type)

383

index[0] = e

385

index[0] = e

384

return index, nodemap, None

386

return index, nodemap, None

385

else:

387

else:

386

data += fp.read()

388

data += fp.read()

387

389

388

# call the C implementation to parse the index data

390

# call the C implementation to parse the index data

389

index, nodemap, cache = parsers.parse_index(data, inline)

391

index, nodemap, cache = parsers.parse_index(data, inline)

390

return index, nodemap, cache

392

return index, nodemap, cache

391

393

392

def packentry(self, entry, node, version, rev):

394

def packentry(self, entry, node, version, rev):

393

p = _pack(indexformatng, *entry)

395

p = _pack(indexformatng, *entry)

394

if rev == 0:

396

if rev == 0:

395

p = _pack(versionformat, version) + p[4:]

397

p = _pack(versionformat, version) + p[4:]

396

return p

398

return p

397

399

398

class revlog(object):

400

class revlog(object):

399

"""

401

"""

400

the underlying revision storage object

402

the underlying revision storage object

401

403

402

A revlog consists of two parts, an index and the revision data.

404

A revlog consists of two parts, an index and the revision data.

403

405

404

The index is a file with a fixed record size containing

406

The index is a file with a fixed record size containing

405

information on each revision, including its nodeid (hash), the

407

information on each revision, including its nodeid (hash), the

406

nodeids of its parents, the position and offset of its data within

408

nodeids of its parents, the position and offset of its data within

407

the data file, and the revision it's based on. Finally, each entry

409

the data file, and the revision it's based on. Finally, each entry

408

contains a linkrev entry that can serve as a pointer to external

410

contains a linkrev entry that can serve as a pointer to external

409

data.

411

data.

410

412

411

The revision data itself is a linear collection of data chunks.

413

The revision data itself is a linear collection of data chunks.

412

Each chunk represents a revision and is usually represented as a

414

Each chunk represents a revision and is usually represented as a

413

delta against the previous chunk. To bound lookup time, runs of

415

delta against the previous chunk. To bound lookup time, runs of

414

deltas are limited to about 2 times the length of the original

416

deltas are limited to about 2 times the length of the original

415

version data. This makes retrieval of a version proportional to

417

version data. This makes retrieval of a version proportional to

416

its size, or O(1) relative to the number of revisions.

418

its size, or O(1) relative to the number of revisions.

417

419

418

Both pieces of the revlog are written to in an append-only

420

Both pieces of the revlog are written to in an append-only

419

fashion, which means we never need to rewrite a file to insert or

421

fashion, which means we never need to rewrite a file to insert or

420

remove data, and can use some simple techniques to avoid the need

422

remove data, and can use some simple techniques to avoid the need

421

for locking while reading.

423

for locking while reading.

422

"""

424

"""

423

def __init__(self, opener, indexfile):

425

def __init__(self, opener, indexfile):

424

"""

426

"""

425

create a revlog object

427

create a revlog object

426

428

427

opener is a function that abstracts the file opening operation

429

opener is a function that abstracts the file opening operation

428

and can be used to implement COW semantics or the like.

430

and can be used to implement COW semantics or the like.

429

"""

431

"""

430

self.indexfile = indexfile

432

self.indexfile = indexfile

431

self.datafile = indexfile[:-2] + ".d"

433

self.datafile = indexfile[:-2] + ".d"

432

self.opener = opener

434

self.opener = opener

433

self._cache = None

435

self._cache = None

434

self._chunkcache = (0, '')

436

self._chunkcache = (0, '')

435

self.nodemap = {nullid: nullrev}

437

self.nodemap = {nullid: nullrev}

436

self.index = []

438

self.index = []

437

439

438

v = REVLOG_DEFAULT_VERSION

440

v = REVLOG_DEFAULT_VERSION

439

if hasattr(opener, 'options') and 'defversion' in opener.options:

441

if hasattr(opener, 'options') and 'defversion' in opener.options:

440

v = opener.options['defversion']

442

v = opener.options['defversion']

441

if v & REVLOGNG:

443

if v & REVLOGNG:

442

v |= REVLOGNGINLINEDATA

444

v |= REVLOGNGINLINEDATA

443

445

444

i = ''

446

i = ''

445

try:

447

try:

446

f = self.opener(self.indexfile)

448

f = self.opener(self.indexfile)

447

if "nonlazy" in getattr(self.opener, 'options', {}):

449

if "nonlazy" in getattr(self.opener, 'options', {}):

448

i = f.read()

450

i = f.read()

449

else:

451

else:

450

i = f.read(_prereadsize)

452

i = f.read(_prereadsize)

451

if len(i) > 0:

453

if len(i) > 0:

452

v = struct.unpack(versionformat, i[:4])[0]

454

v = struct.unpack(versionformat, i[:4])[0]

453

except IOError, inst:

455

except IOError, inst:

454

if inst.errno != errno.ENOENT:

456

if inst.errno != errno.ENOENT:

455

raise

457

raise

456

458

457

self.version = v

459

self.version = v

458

self._inline = v & REVLOGNGINLINEDATA

460

self._inline = v & REVLOGNGINLINEDATA

459

flags = v & ~0xFFFF

461

flags = v & ~0xFFFF

460

fmt = v & 0xFFFF

462

fmt = v & 0xFFFF

461

if fmt == REVLOGV0 and flags:

463

if fmt == REVLOGV0 and flags:

462

raise RevlogError(_("index %s unknown flags %#04x for format v0")

464

raise RevlogError(_("index %s unknown flags %#04x for format v0")

463

% (self.indexfile, flags >> 16))

465

% (self.indexfile, flags >> 16))

464

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

466

elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:

465

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

467

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

466

% (self.indexfile, flags >> 16))

468

% (self.indexfile, flags >> 16))

467

elif fmt > REVLOGNG:

469

elif fmt > REVLOGNG:

468

raise RevlogError(_("index %s unknown format %d")

470

raise RevlogError(_("index %s unknown format %d")

469

% (self.indexfile, fmt))

471

% (self.indexfile, fmt))

470

472

471

self._io = revlogio()

473

self._io = revlogio()

472

if self.version == REVLOGV0:

474

if self.version == REVLOGV0:

473

self._io = revlogoldio()

475

self._io = revlogoldio()

474

if i:

476

if i:

475

try:

477

try:

476

d = self._io.parseindex(f, i, self._inline)

478

d = self._io.parseindex(f, i, self._inline)

477

except (ValueError, IndexError):

479

except (ValueError, IndexError):

478

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

480

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

479

self.index, self.nodemap, self._chunkcache = d

481

self.index, self.nodemap, self._chunkcache = d

480

if not self._chunkcache:

482

if not self._chunkcache:

481

self._chunkclear()

483

self._chunkclear()

482

484

483

# add the magic null revision at -1 (if it hasn't been done already)

485

# add the magic null revision at -1 (if it hasn't been done already)

484

if (self.index == [] or isinstance(self.index, lazyindex) or

486

if (self.index == [] or isinstance(self.index, lazyindex) or

485

self.index[-1][7] != nullid) :

487

self.index[-1][7] != nullid) :

486

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

488

self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))

487

489

488

def _loadindex(self, start, end):

490

def _loadindex(self, start, end):

489

"""load a block of indexes all at once from the lazy parser"""

491

"""load a block of indexes all at once from the lazy parser"""

490

if isinstance(self.index, lazyindex):

492

if isinstance(self.index, lazyindex):

491

self.index.p.loadindex(start, end)

493

self.index.p.loadindex(start, end)

492

494

493

def _loadindexmap(self):

495

def _loadindexmap(self):

494

"""loads both the map and the index from the lazy parser"""

496

"""loads both the map and the index from the lazy parser"""

495

if isinstance(self.index, lazyindex):

497

if isinstance(self.index, lazyindex):

496

p = self.index.p

498

p = self.index.p

497

p.loadindex()

499

p.loadindex()

498

self.nodemap = p.map

500

self.nodemap = p.map

499

501

500

def _loadmap(self):

502

def _loadmap(self):

501

"""loads the map from the lazy parser"""

503

"""loads the map from the lazy parser"""

502

if isinstance(self.nodemap, lazymap):

504

if isinstance(self.nodemap, lazymap):

503

self.nodemap.p.loadmap()

505

self.nodemap.p.loadmap()

504

self.nodemap = self.nodemap.p.map

506

self.nodemap = self.nodemap.p.map

505

507

506

def tip(self):

508

def tip(self):

507

return self.node(len(self.index) - 2)

509

return self.node(len(self.index) - 2)

508

def __len__(self):

510

def __len__(self):

509

return len(self.index) - 1

511

return len(self.index) - 1

510

def __iter__(self):

512

def __iter__(self):

511

for i in xrange(len(self)):

513

for i in xrange(len(self)):

512

yield i

514

yield i

513

def rev(self, node):

515

def rev(self, node):

514

try:

516

try:

515

return self.nodemap[node]

517

return self.nodemap[node]

516

except KeyError:

518

except KeyError:

517

raise LookupError(node, self.indexfile, _('no node'))

519

raise LookupError(node, self.indexfile, _('no node'))

518

def node(self, rev):

520

def node(self, rev):

519

return self.index[rev][7]

521

return self.index[rev][7]

520

def linkrev(self, rev):

522

def linkrev(self, rev):

521

return self.index[rev][4]

523

return self.index[rev][4]

522

def parents(self, node):

524

def parents(self, node):

523

i = self.index

525

i = self.index

524

d = i[self.rev(node)]

526

d = i[self.rev(node)]

525

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

527

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

526

def parentrevs(self, rev):

528

def parentrevs(self, rev):

527

return self.index[rev][5:7]

529

return self.index[rev][5:7]

528

def start(self, rev):

530

def start(self, rev):

529

return int(self.index[rev][0] >> 16)

531

return int(self.index[rev][0] >> 16)

530

def end(self, rev):

532

def end(self, rev):

531

return self.start(rev) + self.length(rev)

533

return self.start(rev) + self.length(rev)

532

def length(self, rev):

534

def length(self, rev):

533

return self.index[rev][1]

535

return self.index[rev][1]

534

def base(self, rev):

536

def base(self, rev):

535

return self.index[rev][3]

537

return self.index[rev][3]

536

def flags(self, rev):

538

def flags(self, rev):

537

return self.index[rev][0] & 0xFFFF

539

return self.index[rev][0] & 0xFFFF

538

540

539

def size(self, rev):

541

def size(self, rev):

540

"""return the length of the uncompressed text for a given revision"""

542

"""return the length of the uncompressed text for a given revision"""

541

l = self.index[rev][2]

543

l = self.index[rev][2]

542

if l >= 0:

544

if l >= 0:

543

return l

545

return l

544

546

545

t = self.revision(self.node(rev))

547

t = self.revision(self.node(rev))

546

return len(t)

548

return len(t)

547

549

548

def reachable(self, node, stop=None):

550

def reachable(self, node, stop=None):

549

"""return the set of all nodes ancestral to a given node, including

551

"""return the set of all nodes ancestral to a given node, including

550

the node itself, stopping when stop is matched"""

552

the node itself, stopping when stop is matched"""

551

reachable = set((node,))

553

reachable = set((node,))

552

visit = [node]

554

visit = [node]

553

if stop:

555

if stop:

554

stopn = self.rev(stop)

556

stopn = self.rev(stop)

555

else:

557

else:

556

stopn = 0

558

stopn = 0

557

while visit:

559

while visit:

558

n = visit.pop(0)

560

n = visit.pop(0)

559

if n == stop:

561

if n == stop:

560

continue

562

continue

561

if n == nullid:

563

if n == nullid:

562

continue

564

continue

563

for p in self.parents(n):

565

for p in self.parents(n):

564

if self.rev(p) < stopn:

566

if self.rev(p) < stopn:

565

continue

567

continue

566

if p not in reachable:

568

if p not in reachable:

567

reachable.add(p)

569

reachable.add(p)

568

visit.append(p)

570

visit.append(p)

569

return reachable

571

return reachable

570

572

571

def ancestors(self, *revs):

573

def ancestors(self, *revs):

572

"""Generate the ancestors of 'revs' in reverse topological order.

574

"""Generate the ancestors of 'revs' in reverse topological order.

573

575

574

Yield a sequence of revision numbers starting with the parents

576

Yield a sequence of revision numbers starting with the parents

575

of each revision in revs, i.e., each revision is *not* considered

577

of each revision in revs, i.e., each revision is *not* considered

576

an ancestor of itself. Results are in breadth-first order:

578

an ancestor of itself. Results are in breadth-first order:

577

parents of each rev in revs, then parents of those, etc. Result

579

parents of each rev in revs, then parents of those, etc. Result

578

does not include the null revision."""

580

does not include the null revision."""

579

visit = list(revs)

581

visit = list(revs)

580

seen = set([nullrev])

582

seen = set([nullrev])

581

while visit:

583

while visit:

582

for parent in self.parentrevs(visit.pop(0)):

584

for parent in self.parentrevs(visit.pop(0)):

583

if parent not in seen:

585

if parent not in seen:

584

visit.append(parent)

586

visit.append(parent)

585

seen.add(parent)

587

seen.add(parent)

586

yield parent

588

yield parent

587

589

588

def descendants(self, *revs):

590

def descendants(self, *revs):

589

"""Generate the descendants of 'revs' in revision order.

591

"""Generate the descendants of 'revs' in revision order.

590

592

591

Yield a sequence of revision numbers starting with a child of

593

Yield a sequence of revision numbers starting with a child of

592

some rev in revs, i.e., each revision is *not* considered a

594

some rev in revs, i.e., each revision is *not* considered a

593

descendant of itself. Results are ordered by revision number (a

595

descendant of itself. Results are ordered by revision number (a

594

topological sort)."""

596

topological sort)."""

595

seen = set(revs)

597

seen = set(revs)

596

for i in xrange(min(revs) + 1, len(self)):

598

for i in xrange(min(revs) + 1, len(self)):

597

for x in self.parentrevs(i):

599

for x in self.parentrevs(i):

598

if x != nullrev and x in seen:

600

if x != nullrev and x in seen:

599

seen.add(i)

601

seen.add(i)

600

yield i

602

yield i

601

break

603

break

602

604

603

def findmissing(self, common=None, heads=None):

605

def findmissing(self, common=None, heads=None):

604

"""Return the ancestors of heads that are not ancestors of common.

606

"""Return the ancestors of heads that are not ancestors of common.

605

607

606

More specifically, return a list of nodes N such that every N

608

More specifically, return a list of nodes N such that every N

607

satisfies the following constraints:

609

satisfies the following constraints:

608

610

609

1. N is an ancestor of some node in 'heads'

611

1. N is an ancestor of some node in 'heads'

610

2. N is not an ancestor of any node in 'common'

612

2. N is not an ancestor of any node in 'common'

611

613

612

The list is sorted by revision number, meaning it is

614

The list is sorted by revision number, meaning it is

613

topologically sorted.

615

topologically sorted.

614

616

615

'heads' and 'common' are both lists of node IDs. If heads is

617

'heads' and 'common' are both lists of node IDs. If heads is

616

not supplied, uses all of the revlog's heads. If common is not

618

not supplied, uses all of the revlog's heads. If common is not

617

supplied, uses nullid."""

619

supplied, uses nullid."""

618

if common is None:

620

if common is None:

619

common = [nullid]

621

common = [nullid]

620

if heads is None:

622

if heads is None:

621

heads = self.heads()

623

heads = self.heads()

622

624

623

common = [self.rev(n) for n in common]

625

common = [self.rev(n) for n in common]

624

heads = [self.rev(n) for n in heads]

626

heads = [self.rev(n) for n in heads]

625

627

626

# we want the ancestors, but inclusive

628

# we want the ancestors, but inclusive

627

has = set(self.ancestors(*common))

629

has = set(self.ancestors(*common))

628

has.add(nullrev)

630

has.add(nullrev)

629

has.update(common)

631

has.update(common)

630

632

631

# take all ancestors from heads that aren't in has

633

# take all ancestors from heads that aren't in has

632

missing = set()

634

missing = set()

633

visit = [r for r in heads if r not in has]

635

visit = [r for r in heads if r not in has]

634

while visit:

636

while visit:

635

r = visit.pop(0)

637

r = visit.pop(0)

636

if r in missing:

638

if r in missing:

637

continue

639

continue

638

else:

640

else:

639

missing.add(r)

641

missing.add(r)

640

for p in self.parentrevs(r):

642

for p in self.parentrevs(r):

641

if p not in has:

643

if p not in has:

642

visit.append(p)

644

visit.append(p)

643

missing = list(missing)

645

missing = list(missing)

644

missing.sort()

646

missing.sort()

645

return [self.node(r) for r in missing]

647

return [self.node(r) for r in missing]

646

648

647

def nodesbetween(self, roots=None, heads=None):

649

def nodesbetween(self, roots=None, heads=None):

648

"""Return a topological path from 'roots' to 'heads'.

650

"""Return a topological path from 'roots' to 'heads'.

649

651

650

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

652

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

651

topologically sorted list of all nodes N that satisfy both of

653

topologically sorted list of all nodes N that satisfy both of

652

these constraints:

654

these constraints:

653

655

654

1. N is a descendant of some node in 'roots'

656

1. N is a descendant of some node in 'roots'

655

2. N is an ancestor of some node in 'heads'

657

2. N is an ancestor of some node in 'heads'

656

658

657

Every node is considered to be both a descendant and an ancestor

659

Every node is considered to be both a descendant and an ancestor

658

of itself, so every reachable node in 'roots' and 'heads' will be

660

of itself, so every reachable node in 'roots' and 'heads' will be

659

included in 'nodes'.

661

included in 'nodes'.

660

662

661

'outroots' is the list of reachable nodes in 'roots', i.e., the

663

'outroots' is the list of reachable nodes in 'roots', i.e., the

662

subset of 'roots' that is returned in 'nodes'. Likewise,

664

subset of 'roots' that is returned in 'nodes'. Likewise,

663

'outheads' is the subset of 'heads' that is also in 'nodes'.

665

'outheads' is the subset of 'heads' that is also in 'nodes'.

664

666

665

'roots' and 'heads' are both lists of node IDs. If 'roots' is

667

'roots' and 'heads' are both lists of node IDs. If 'roots' is

666

unspecified, uses nullid as the only root. If 'heads' is

668

unspecified, uses nullid as the only root. If 'heads' is

667

unspecified, uses list of all of the revlog's heads."""

669

unspecified, uses list of all of the revlog's heads."""

668

nonodes = ([], [], [])

670

nonodes = ([], [], [])

669

if roots is not None:

671

if roots is not None:

670

roots = list(roots)

672

roots = list(roots)

671

if not roots:

673

if not roots:

672

return nonodes

674

return nonodes

673

lowestrev = min([self.rev(n) for n in roots])

675

lowestrev = min([self.rev(n) for n in roots])

674

else:

676

else:

675

roots = [nullid] # Everybody's a descendent of nullid

677

roots = [nullid] # Everybody's a descendent of nullid

676

lowestrev = nullrev

678

lowestrev = nullrev

677

if (lowestrev == nullrev) and (heads is None):

679

if (lowestrev == nullrev) and (heads is None):

678

# We want _all_ the nodes!

680

# We want _all_ the nodes!

679

return ([self.node(r) for r in self], [nullid], list(self.heads()))

681

return ([self.node(r) for r in self], [nullid], list(self.heads()))

680

if heads is None:

682

if heads is None:

681

# All nodes are ancestors, so the latest ancestor is the last

683

# All nodes are ancestors, so the latest ancestor is the last

682

# node.

684

# node.

683

highestrev = len(self) - 1

685

highestrev = len(self) - 1

684

# Set ancestors to None to signal that every node is an ancestor.

686

# Set ancestors to None to signal that every node is an ancestor.

685

ancestors = None

687

ancestors = None

686

# Set heads to an empty dictionary for later discovery of heads

688

# Set heads to an empty dictionary for later discovery of heads

687

heads = {}

689

heads = {}

688

else:

690

else:

689

heads = list(heads)

691

heads = list(heads)

690

if not heads:

692

if not heads:

691

return nonodes

693

return nonodes

692

ancestors = set()

694

ancestors = set()

693

# Turn heads into a dictionary so we can remove 'fake' heads.

695

# Turn heads into a dictionary so we can remove 'fake' heads.

694

# Also, later we will be using it to filter out the heads we can't

696

# Also, later we will be using it to filter out the heads we can't

695

# find from roots.

697

# find from roots.

696

heads = dict.fromkeys(heads, 0)

698

heads = dict.fromkeys(heads, 0)

697

# Start at the top and keep marking parents until we're done.

699

# Start at the top and keep marking parents until we're done.

698

nodestotag = set(heads)

700

nodestotag = set(heads)

699

# Remember where the top was so we can use it as a limit later.

701

# Remember where the top was so we can use it as a limit later.

700

highestrev = max([self.rev(n) for n in nodestotag])

702

highestrev = max([self.rev(n) for n in nodestotag])

701

while nodestotag:

703

while nodestotag:

702

# grab a node to tag

704

# grab a node to tag

703

n = nodestotag.pop()

705

n = nodestotag.pop()

704

# Never tag nullid

706

# Never tag nullid

705

if n == nullid:

707

if n == nullid:

706

continue

708

continue

707

# A node's revision number represents its place in a

709

# A node's revision number represents its place in a

708

# topologically sorted list of nodes.

710

# topologically sorted list of nodes.

709

r = self.rev(n)

711

r = self.rev(n)

710

if r >= lowestrev:

712

if r >= lowestrev:

711

if n not in ancestors:

713

if n not in ancestors:

712

# If we are possibly a descendent of one of the roots

714

# If we are possibly a descendent of one of the roots

713

# and we haven't already been marked as an ancestor

715

# and we haven't already been marked as an ancestor

714

ancestors.add(n) # Mark as ancestor

716

ancestors.add(n) # Mark as ancestor

715

# Add non-nullid parents to list of nodes to tag.

717

# Add non-nullid parents to list of nodes to tag.

716

nodestotag.update([p for p in self.parents(n) if

718

nodestotag.update([p for p in self.parents(n) if

717

p != nullid])

719

p != nullid])

718

elif n in heads: # We've seen it before, is it a fake head?

720

elif n in heads: # We've seen it before, is it a fake head?

719

# So it is, real heads should not be the ancestors of

721

# So it is, real heads should not be the ancestors of

720

# any other heads.

722

# any other heads.

721

heads.pop(n)

723

heads.pop(n)

722

if not ancestors:

724

if not ancestors:

723

return nonodes

725

return nonodes

724

# Now that we have our set of ancestors, we want to remove any

726

# Now that we have our set of ancestors, we want to remove any

725

# roots that are not ancestors.

727

# roots that are not ancestors.

726

728

727

# If one of the roots was nullid, everything is included anyway.

729

# If one of the roots was nullid, everything is included anyway.

728

if lowestrev > nullrev:

730

if lowestrev > nullrev:

729

# But, since we weren't, let's recompute the lowest rev to not

731

# But, since we weren't, let's recompute the lowest rev to not

730

# include roots that aren't ancestors.

732

# include roots that aren't ancestors.

731

733

732

# Filter out roots that aren't ancestors of heads

734

# Filter out roots that aren't ancestors of heads

733

roots = [n for n in roots if n in ancestors]

735

roots = [n for n in roots if n in ancestors]

734

# Recompute the lowest revision

736

# Recompute the lowest revision

735

if roots:

737

if roots:

736

lowestrev = min([self.rev(n) for n in roots])

738

lowestrev = min([self.rev(n) for n in roots])

737

else:

739

else:

738

# No more roots? Return empty list

740

# No more roots? Return empty list

739

return nonodes

741

return nonodes

740

else:

742

else:

741

# We are descending from nullid, and don't need to care about

743

# We are descending from nullid, and don't need to care about

742

# any other roots.

744

# any other roots.

743

lowestrev = nullrev

745

lowestrev = nullrev

744

roots = [nullid]

746

roots = [nullid]

745

# Transform our roots list into a set.

747

# Transform our roots list into a set.

746

descendents = set(roots)

748

descendents = set(roots)

747

# Also, keep the original roots so we can filter out roots that aren't

749

# Also, keep the original roots so we can filter out roots that aren't

748

# 'real' roots (i.e. are descended from other roots).

750

# 'real' roots (i.e. are descended from other roots).

749

roots = descendents.copy()

751

roots = descendents.copy()

750

# Our topologically sorted list of output nodes.

752

# Our topologically sorted list of output nodes.

751

orderedout = []

753

orderedout = []

752

# Don't start at nullid since we don't want nullid in our output list,

754

# Don't start at nullid since we don't want nullid in our output list,

753

# and if nullid shows up in descedents, empty parents will look like

755

# and if nullid shows up in descedents, empty parents will look like

754

# they're descendents.

756

# they're descendents.

755

for r in xrange(max(lowestrev, 0), highestrev + 1):

757

for r in xrange(max(lowestrev, 0), highestrev + 1):

756

n = self.node(r)

758

n = self.node(r)

757

isdescendent = False

759

isdescendent = False

758

if lowestrev == nullrev: # Everybody is a descendent of nullid

760

if lowestrev == nullrev: # Everybody is a descendent of nullid

759

isdescendent = True

761

isdescendent = True

760

elif n in descendents:

762

elif n in descendents:

761

# n is already a descendent

763

# n is already a descendent

762

isdescendent = True

764

isdescendent = True

763

# This check only needs to be done here because all the roots

765

# This check only needs to be done here because all the roots

764

# will start being marked is descendents before the loop.

766

# will start being marked is descendents before the loop.

765

if n in roots:

767

if n in roots:

766

# If n was a root, check if it's a 'real' root.

768

# If n was a root, check if it's a 'real' root.

767

p = tuple(self.parents(n))

769

p = tuple(self.parents(n))

768

# If any of its parents are descendents, it's not a root.

770

# If any of its parents are descendents, it's not a root.

769

if (p[0] in descendents) or (p[1] in descendents):

771

if (p[0] in descendents) or (p[1] in descendents):

770

roots.remove(n)

772

roots.remove(n)

771

else:

773

else:

772

p = tuple(self.parents(n))

774

p = tuple(self.parents(n))

773

# A node is a descendent if either of its parents are

775

# A node is a descendent if either of its parents are

774

# descendents. (We seeded the dependents list with the roots

776

# descendents. (We seeded the dependents list with the roots

775

# up there, remember?)

777

# up there, remember?)

776

if (p[0] in descendents) or (p[1] in descendents):

778

if (p[0] in descendents) or (p[1] in descendents):

777

descendents.add(n)

779

descendents.add(n)

778

isdescendent = True

780

isdescendent = True

779

if isdescendent and ((ancestors is None) or (n in ancestors)):

781

if isdescendent and ((ancestors is None) or (n in ancestors)):

780

# Only include nodes that are both descendents and ancestors.

782

# Only include nodes that are both descendents and ancestors.

781

orderedout.append(n)

783

orderedout.append(n)

782

if (ancestors is not None) and (n in heads):

784

if (ancestors is not None) and (n in heads):

783

# We're trying to figure out which heads are reachable

785

# We're trying to figure out which heads are reachable

784

# from roots.

786

# from roots.

785

# Mark this head as having been reached

787

# Mark this head as having been reached

786

heads[n] = 1

788

heads[n] = 1

787

elif ancestors is None:

789

elif ancestors is None:

788

# Otherwise, we're trying to discover the heads.

790

# Otherwise, we're trying to discover the heads.

789

# Assume this is a head because if it isn't, the next step

791

# Assume this is a head because if it isn't, the next step

790

# will eventually remove it.

792

# will eventually remove it.

791

heads[n] = 1

793

heads[n] = 1

792

# But, obviously its parents aren't.

794

# But, obviously its parents aren't.

793

for p in self.parents(n):

795

for p in self.parents(n):

794

heads.pop(p, None)

796

heads.pop(p, None)

795

heads = [n for n in heads.iterkeys() if heads[n] != 0]

797

heads = [n for n in heads.iterkeys() if heads[n] != 0]

796

roots = list(roots)

798

roots = list(roots)

797

assert orderedout

799

assert orderedout

798

assert roots

800

assert roots

799

assert heads

801

assert heads

800

return (orderedout, roots, heads)

802

return (orderedout, roots, heads)

801

803

802

def heads(self, start=None, stop=None):

804

def heads(self, start=None, stop=None):

803

"""return the list of all nodes that have no children

805

"""return the list of all nodes that have no children

804

806

805

if start is specified, only heads that are descendants of

807

if start is specified, only heads that are descendants of

806

start will be returned

808

start will be returned

807

if stop is specified, it will consider all the revs from stop

809

if stop is specified, it will consider all the revs from stop

808

as if they had no children

810

as if they had no children

809

"""

811

"""

810

if start is None and stop is None:

812

if start is None and stop is None:

811

count = len(self)

813

count = len(self)

812

if not count:

814

if not count:

813

return [nullid]

815

return [nullid]

814

ishead = [1] * (count + 1)

816

ishead = [1] * (count + 1)

815

index = self.index

817

index = self.index

816

for r in xrange(count):

818

for r in xrange(count):

817

e = index[r]

819

e = index[r]

818

ishead[e[5]] = ishead[e[6]] = 0

820

ishead[e[5]] = ishead[e[6]] = 0

819

return [self.node(r) for r in xrange(count) if ishead[r]]

821

return [self.node(r) for r in xrange(count) if ishead[r]]

820

822

821

if start is None:

823

if start is None:

822

start = nullid

824

start = nullid

823

if stop is None:

825

if stop is None:

824

stop = []

826

stop = []

825

stoprevs = set([self.rev(n) for n in stop])

827

stoprevs = set([self.rev(n) for n in stop])

826

startrev = self.rev(start)

828

startrev = self.rev(start)

827

reachable = set((startrev,))

829

reachable = set((startrev,))

828

heads = set((startrev,))

830

heads = set((startrev,))

829

831

830

parentrevs = self.parentrevs

832

parentrevs = self.parentrevs

831

for r in xrange(startrev + 1, len(self)):

833

for r in xrange(startrev + 1, len(self)):

832

for p in parentrevs(r):

834

for p in parentrevs(r):

833

if p in reachable:

835

if p in reachable:

834

if r not in stoprevs:

836

if r not in stoprevs:

835

reachable.add(r)

837

reachable.add(r)

836

heads.add(r)

838

heads.add(r)

837

if p in heads and p not in stoprevs:

839

if p in heads and p not in stoprevs:

838

heads.remove(p)

840

heads.remove(p)

839

841

840

return [self.node(r) for r in heads]

842

return [self.node(r) for r in heads]

841

843

842

def children(self, node):

844

def children(self, node):

843

"""find the children of a given node"""

845

"""find the children of a given node"""

844

c = []

846

c = []

845

p = self.rev(node)

847

p = self.rev(node)

846

for r in range(p + 1, len(self)):

848

for r in range(p + 1, len(self)):

847

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

849

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

848

if prevs:

850

if prevs:

849

for pr in prevs:

851

for pr in prevs:

850

if pr == p:

852

if pr == p:

851

c.append(self.node(r))

853

c.append(self.node(r))

852

elif p == nullrev:

854

elif p == nullrev:

853

c.append(self.node(r))

855

c.append(self.node(r))

854

return c

856

return c

855

857

856

def descendant(self, start, end):

858

def descendant(self, start, end):

857

for i in self.descendants(start):

859

for i in self.descendants(start):

858

if i == end:

860

if i == end:

859

return True

861

return True

860

elif i > end:

862

elif i > end:

861

break

863

break

862

return False

864

return False

863

865

864

def ancestor(self, a, b):

866

def ancestor(self, a, b):

865

"""calculate the least common ancestor of nodes a and b"""

867

"""calculate the least common ancestor of nodes a and b"""

866

868

867

# fast path, check if it is a descendant

869

# fast path, check if it is a descendant

868

a, b = self.rev(a), self.rev(b)

870

a, b = self.rev(a), self.rev(b)

869

start, end = sorted((a, b))

871

start, end = sorted((a, b))

870

if self.descendant(start, end):

872

if self.descendant(start, end):

871

return self.node(start)

873

return self.node(start)

872

874

873

def parents(rev):

875

def parents(rev):

874

return [p for p in self.parentrevs(rev) if p != nullrev]

876

return [p for p in self.parentrevs(rev) if p != nullrev]

875

877

876

c = ancestor.ancestor(a, b, parents)

878

c = ancestor.ancestor(a, b, parents)

877

if c is None:

879

if c is None:

878

return nullid

880

return nullid

879

881

880

return self.node(c)

882

return self.node(c)

881

883

882

def _match(self, id):

884

def _match(self, id):

883

if isinstance(id, (long, int)):

885

if isinstance(id, (long, int)):

884

# rev

886

# rev

885

return self.node(id)

887

return self.node(id)

886

if len(id) == 20:

888

if len(id) == 20:

887

# possibly a binary node

889

# possibly a binary node

888

# odds of a binary node being all hex in ASCII are 1 in 10**25

890

# odds of a binary node being all hex in ASCII are 1 in 10**25

889

try:

891

try:

890

node = id

892

node = id

891

self.rev(node) # quick search the index

893

self.rev(node) # quick search the index

892

return node

894

return node

893

except LookupError:

895

except LookupError:

894

pass # may be partial hex id

896

pass # may be partial hex id

895

try:

897

try:

896

# str(rev)

898

# str(rev)

897

rev = int(id)

899

rev = int(id)

898

if str(rev) != id:

900

if str(rev) != id:

899

raise ValueError

901

raise ValueError

900

if rev < 0:

902

if rev < 0:

901

rev = len(self) + rev

903

rev = len(self) + rev

902

if rev < 0 or rev >= len(self):

904

if rev < 0 or rev >= len(self):

903

raise ValueError

905

raise ValueError

904

return self.node(rev)

906

return self.node(rev)

905

except (ValueError, OverflowError):

907

except (ValueError, OverflowError):

906

pass

908

pass

907

if len(id) == 40:

909

if len(id) == 40:

908

try:

910

try:

909

# a full hex nodeid?

911

# a full hex nodeid?

910

node = bin(id)

912

node = bin(id)

911

self.rev(node)

913

self.rev(node)

912

return node

914

return node

913

except (TypeError, LookupError):

915

except (TypeError, LookupError):

914

pass

916

pass

915

917

916

def _partialmatch(self, id):

918

def _partialmatch(self, id):

917

if len(id) < 40:

919

if len(id) < 40:

918

try:

920

try:

919

# hex(node)[:...]

921

# hex(node)[:...]

920

l = len(id) // 2 # grab an even number of digits

922

l = len(id) // 2 # grab an even number of digits

921

bin_id = bin(id[:l * 2])

923

bin_id = bin(id[:l * 2])

922

nl = [n for n in self.nodemap if n[:l] == bin_id]

924

nl = [n for n in self.nodemap if n[:l] == bin_id]

923

nl = [n for n in nl if hex(n).startswith(id)]

925

nl = [n for n in nl if hex(n).startswith(id)]

924

if len(nl) > 0:

926

if len(nl) > 0:

925

if len(nl) == 1:

927

if len(nl) == 1:

926

return nl[0]

928

return nl[0]

927

raise LookupError(id, self.indexfile,

929

raise LookupError(id, self.indexfile,

928

_('ambiguous identifier'))

930

_('ambiguous identifier'))

929

return None

931

return None

930

except TypeError:

932

except TypeError:

931

pass

933

pass

932

934

933

def lookup(self, id):

935

def lookup(self, id):

934

"""locate a node based on:

936

"""locate a node based on:

935

- revision number or str(revision number)

937

- revision number or str(revision number)

936

- nodeid or subset of hex nodeid

938

- nodeid or subset of hex nodeid

937

"""

939

"""

938

n = self._match(id)

940

n = self._match(id)

939

if n is not None:

941

if n is not None:

940

return n

942

return n

941

n = self._partialmatch(id)

943

n = self._partialmatch(id)

942

if n:

944

if n:

943

return n

945

return n

944

946

945

raise LookupError(id, self.indexfile, _('no match found'))

947

raise LookupError(id, self.indexfile, _('no match found'))

946

948

947

def cmp(self, node, text):

949

def cmp(self, node, text):

948

"""compare text with a given file revision

950

"""compare text with a given file revision

949

951

950

returns True if text is different than what is stored.

952

returns True if text is different than what is stored.

951

"""

953

"""

952

p1, p2 = self.parents(node)

954

p1, p2 = self.parents(node)

953

return hash(text, p1, p2) != node

955

return hash(text, p1, p2) != node

954

956

955

def _addchunk(self, offset, data):

957

def _addchunk(self, offset, data):

956

o, d = self._chunkcache

958

o, d = self._chunkcache

957

# try to add to existing cache

959

# try to add to existing cache

958

if o + len(d) == offset and len(d) + len(data) < _prereadsize:

960

if o + len(d) == offset and len(d) + len(data) < _prereadsize:

959

self._chunkcache = o, d + data

961

self._chunkcache = o, d + data

960

else:

962

else:

961

self._chunkcache = offset, data

963

self._chunkcache = offset, data

962

964

963

def _loadchunk(self, offset, length):

965

def _loadchunk(self, offset, length):

964

if self._inline:

966

if self._inline:

965

df = self.opener(self.indexfile)

967

df = self.opener(self.indexfile)

966

else:

968

else:

967

df = self.opener(self.datafile)

969

df = self.opener(self.datafile)

968

970

969

readahead = max(65536, length)

971

readahead = max(65536, length)

970

df.seek(offset)

972

df.seek(offset)

971

d = df.read(readahead)

973

d = df.read(readahead)

972

self._addchunk(offset, d)

974

self._addchunk(offset, d)

973

if readahead > length:

975

if readahead > length:

974

return d[:length]

976

return d[:length]

975

return d

977

return d

976

978

977

def _getchunk(self, offset, length):

979

def _getchunk(self, offset, length):

978

o, d = self._chunkcache

980

o, d = self._chunkcache

979

l = len(d)

981

l = len(d)

980

982

981

# is it in the cache?

983

# is it in the cache?

982

cachestart = offset - o

984

cachestart = offset - o

983

cacheend = cachestart + length

985

cacheend = cachestart + length

984

if cachestart >= 0 and cacheend <= l:

986

if cachestart >= 0 and cacheend <= l:

985

if cachestart == 0 and cacheend == l:

987

if cachestart == 0 and cacheend == l:

986

return d # avoid a copy

988

return d # avoid a copy

987

return d[cachestart:cacheend]

989

return d[cachestart:cacheend]

988

990

989

return self._loadchunk(offset, length)

991

return self._loadchunk(offset, length)

990

992

991

def _chunkraw(self, startrev, endrev):

993

def _chunkraw(self, startrev, endrev):

992

start = self.start(startrev)

994

start = self.start(startrev)

993

length = self.end(endrev) - start

995

length = self.end(endrev) - start

994

if self._inline:

996

if self._inline:

995

start += (startrev + 1) * self._io.size

997

start += (startrev + 1) * self._io.size

996

return self._getchunk(start, length)

998

return self._getchunk(start, length)

997

999

998

def _chunk(self, rev):

1000

def _chunk(self, rev):

999

return decompress(self._chunkraw(rev, rev))

1001

return decompress(self._chunkraw(rev, rev))

1000

1002

1001

def _chunkclear(self):

1003

def _chunkclear(self):

1002

self._chunkcache = (0, '')

1004

self._chunkcache = (0, '')

1003

1005

1004

def revdiff(self, rev1, rev2):

1006

def revdiff(self, rev1, rev2):

1005

"""return or calculate a delta between two revisions"""

1007

"""return or calculate a delta between two revisions"""

1006

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

1008

if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):

1007

return self._chunk(rev2)

1009

return self._chunk(rev2)

1008

1010

1009

return mdiff.textdiff(self.revision(self.node(rev1)),

1011

return mdiff.textdiff(self.revision(self.node(rev1)),

1010

self.revision(self.node(rev2)))

1012

self.revision(self.node(rev2)))

1011

1013

1012

def revision(self, node):

1014

def revision(self, node):

1013

"""return an uncompressed revision of a given node"""

1015

"""return an uncompressed revision of a given node"""

1014

if node == nullid:

1016

if node == nullid:

1015

return ""

1017

return ""

1016

if self._cache and self._cache[0] == node:

1018

if self._cache and self._cache[0] == node:

1017

return self._cache[2]

1019

return self._cache[2]

1018

1020

1019

# look up what we need to read

1021

# look up what we need to read

1020

text = None

1022

text = None

1021

rev = self.rev(node)

1023

rev = self.rev(node)

1022

base = self.base(rev)

1024

base = self.base(rev)

1023

1025

1024

# check rev flags

1026

# check rev flags

1025

if self.flags(rev):

1027

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

1026

raise RevlogError(_('incompatible revision flag %x') %

1028

raise RevlogError(_('incompatible revision flag %x') %

1027

(self.flags(rev)))

1029

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

1028

1030

1029

# do we have useful data cached?

1031

# do we have useful data cached?

1030

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

1032

if self._cache and self._cache[1] >= base and self._cache[1] < rev:

1031

base = self._cache[1]

1033

base = self._cache[1]

1032

text = self._cache[2]

1034

text = self._cache[2]

1033

1035

1034

self._loadindex(base, rev + 1)

1036

self._loadindex(base, rev + 1)

1035

self._chunkraw(base, rev)

1037

self._chunkraw(base, rev)

1036

if text is None:

1038

if text is None:

1037

text = self._chunk(base)

1039

text = self._chunk(base)

1038

1040

1039

bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]

1041

bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]

1040

text = mdiff.patches(text, bins)

1042

text = mdiff.patches(text, bins)

1041

p1, p2 = self.parents(node)

1043

p1, p2 = self.parents(node)

1042

if node != hash(text, p1, p2):

1044

if (node != hash(text, p1, p2) and

1045

not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):

1043

raise RevlogError(_("integrity check failed on %s:%d")

1046

raise RevlogError(_("integrity check failed on %s:%d")

1044

% (self.indexfile, rev))

1047

% (self.indexfile, rev))

1045

1048

1046

self._cache = (node, rev, text)

1049

self._cache = (node, rev, text)

1047

return text

1050

return text

1048

1051

1049

def checkinlinesize(self, tr, fp=None):

1052

def checkinlinesize(self, tr, fp=None):

1050

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1053

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1051

return

1054

return

1052

1055

1053

trinfo = tr.find(self.indexfile)

1056

trinfo = tr.find(self.indexfile)

1054

if trinfo is None:

1057

if trinfo is None:

1055

raise RevlogError(_("%s not found in the transaction")

1058

raise RevlogError(_("%s not found in the transaction")

1056

% self.indexfile)

1059

% self.indexfile)

1057

1060

1058

trindex = trinfo[2]

1061

trindex = trinfo[2]

1059

dataoff = self.start(trindex)

1062

dataoff = self.start(trindex)

1060

1063

1061

tr.add(self.datafile, dataoff)

1064

tr.add(self.datafile, dataoff)

1062

1065

1063

if fp:

1066

if fp:

1064

fp.flush()

1067

fp.flush()

1065

fp.close()

1068

fp.close()

1066

1069

1067

df = self.opener(self.datafile, 'w')

1070

df = self.opener(self.datafile, 'w')

1068

try:

1071

try:

1069

for r in self:

1072

for r in self:

1070

df.write(self._chunkraw(r, r))

1073

df.write(self._chunkraw(r, r))

1071

finally:

1074

finally:

1072

df.close()

1075

df.close()

1073

1076

1074

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1077

fp = self.opener(self.indexfile, 'w', atomictemp=True)

1075

self.version &= ~(REVLOGNGINLINEDATA)

1078

self.version &= ~(REVLOGNGINLINEDATA)

1076

self._inline = False

1079

self._inline = False

1077

for i in self:

1080

for i in self:

1078

e = self._io.packentry(self.index[i], self.node, self.version, i)

1081

e = self._io.packentry(self.index[i], self.node, self.version, i)

1079

fp.write(e)

1082

fp.write(e)

1080

1083

1081

# if we don't call rename, the temp file will never replace the

1084

# if we don't call rename, the temp file will never replace the

1082

# real index

1085

# real index

1083

fp.rename()

1086

fp.rename()

1084

1087

1085

tr.replace(self.indexfile, trindex * self._io.size)

1088

tr.replace(self.indexfile, trindex * self._io.size)

1086

self._chunkclear()

1089

self._chunkclear()

1087

1090

1088

def addrevision(self, text, transaction, link, p1, p2, d=None):

1091

def addrevision(self, text, transaction, link, p1, p2, d=None):

1089

"""add a revision to the log

1092

"""add a revision to the log

1090

1093

1091

text - the revision data to add

1094

text - the revision data to add

1092

transaction - the transaction object used for rollback

1095

transaction - the transaction object used for rollback

1093

link - the linkrev data to add

1096

link - the linkrev data to add

1094

p1, p2 - the parent nodeids of the revision

1097

p1, p2 - the parent nodeids of the revision

1095

d - an optional precomputed delta

1098

d - an optional precomputed delta

1096

"""

1099

"""

1097

dfh = None

1100

dfh = None

1098

if not self._inline:

1101

if not self._inline:

1099

dfh = self.opener(self.datafile, "a")

1102

dfh = self.opener(self.datafile, "a")

1100

ifh = self.opener(self.indexfile, "a+")

1103

ifh = self.opener(self.indexfile, "a+")

1101

try:

1104

try:

1102

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1105

return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)

1103

finally:

1106

finally:

1104

if dfh:

1107

if dfh:

1105

dfh.close()

1108

dfh.close()

1106

ifh.close()

1109

ifh.close()

1107

1110

1108

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1111

def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):

1109

node = hash(text, p1, p2)

1112

node = hash(text, p1, p2)

1110

if node in self.nodemap:

1113

if node in self.nodemap:

1111

return node

1114

return node

1112

1115

1113

curr = len(self)

1116

curr = len(self)

1114

prev = curr - 1

1117

prev = curr - 1

1115

base = self.base(prev)

1118

base = self.base(prev)

1116

offset = self.end(prev)

1119

offset = self.end(prev)

1117

1120

1118

if curr:

1121

if curr:

1119

if not d:

1122

if not d:

1120

ptext = self.revision(self.node(prev))

1123

ptext = self.revision(self.node(prev))

1121

d = mdiff.textdiff(ptext, text)

1124

d = mdiff.textdiff(ptext, text)

1122

data = compress(d)

1125

data = compress(d)

1123

l = len(data[1]) + len(data[0])

1126

l = len(data[1]) + len(data[0])

1124

dist = l + offset - self.start(base)

1127

dist = l + offset - self.start(base)

1125

1128

1126

# full versions are inserted when the needed deltas

1129

# full versions are inserted when the needed deltas

1127

# become comparable to the uncompressed text

1130

# become comparable to the uncompressed text

1128

if not curr or dist > len(text) * 2:

1131

# or the base revision is punched

1132

if (not curr or dist > len(text) * 2 or

1133

(self.flags(base) & REVIDX_PUNCHED_FLAG)):

1129

data = compress(text)

1134

data = compress(text)

1130

l = len(data[1]) + len(data[0])

1135

l = len(data[1]) + len(data[0])

1131

base = curr

1136

base = curr

1132

1137

1133

e = (offset_type(offset, 0), l, len(text),

1138

e = (offset_type(offset, 0), l, len(text),

1134

base, link, self.rev(p1), self.rev(p2), node)

1139

base, link, self.rev(p1), self.rev(p2), node)

1135

self.index.insert(-1, e)

1140

self.index.insert(-1, e)

1136

self.nodemap[node] = curr

1141

self.nodemap[node] = curr

1137

1142

1138

entry = self._io.packentry(e, self.node, self.version, curr)

1143

entry = self._io.packentry(e, self.node, self.version, curr)

1139

if not self._inline:

1144

if not self._inline:

1140

transaction.add(self.datafile, offset)

1145

transaction.add(self.datafile, offset)

1141

transaction.add(self.indexfile, curr * len(entry))

1146

transaction.add(self.indexfile, curr * len(entry))

1142

if data[0]:

1147

if data[0]:

1143

dfh.write(data[0])

1148

dfh.write(data[0])

1144

dfh.write(data[1])

1149

dfh.write(data[1])

1145

dfh.flush()

1150

dfh.flush()

1146

ifh.write(entry)

1151

ifh.write(entry)

1147

else:

1152

else:

1148

offset += curr * self._io.size

1153

offset += curr * self._io.size

1149

transaction.add(self.indexfile, offset, curr)

1154

transaction.add(self.indexfile, offset, curr)

1150

ifh.write(entry)

1155

ifh.write(entry)

1151

ifh.write(data[0])

1156

ifh.write(data[0])

1152

ifh.write(data[1])

1157

ifh.write(data[1])

1153

self.checkinlinesize(transaction, ifh)

1158

self.checkinlinesize(transaction, ifh)

1154

1159

1155

if type(text) == str: # only accept immutable objects

1160

if type(text) == str: # only accept immutable objects

1156

self._cache = (node, curr, text)

1161

self._cache = (node, curr, text)

1157

return node

1162

return node

1158

1163

1159

def group(self, nodelist, lookup, infocollect=None):

1164

def group(self, nodelist, lookup, infocollect=None):

1160

"""Calculate a delta group, yielding a sequence of changegroup chunks

1165

"""Calculate a delta group, yielding a sequence of changegroup chunks

1161

(strings).

1166

(strings).

1162

1167

1163

Given a list of changeset revs, return a set of deltas and

1168

Given a list of changeset revs, return a set of deltas and

1164

metadata corresponding to nodes. the first delta is

1169

metadata corresponding to nodes. the first delta is

1165

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1170

parent(nodes[0]) -> nodes[0] the receiver is guaranteed to

1166

have this parent as it has all history before these

1171

have this parent as it has all history before these

1167

changesets. parent is parent[0]

1172

changesets. parent is parent[0]

1168

"""

1173

"""

1169

1174

1170

revs = [self.rev(n) for n in nodelist]

1175

revs = [self.rev(n) for n in nodelist]

1171

1176

1172

# if we don't have any revisions touched by these changesets, bail

1177

# if we don't have any revisions touched by these changesets, bail

1173

if not revs:

1178

if not revs:

1174

yield changegroup.closechunk()

1179

yield changegroup.closechunk()

1175

return

1180

return

1176

1181

1177

# add the parent of the first rev

1182

# add the parent of the first rev

1178

p = self.parentrevs(revs[0])[0]

1183

p = self.parentrevs(revs[0])[0]

1179

revs.insert(0, p)

1184

revs.insert(0, p)

1180

1185

1181

# build deltas

1186

# build deltas

1182

for d in xrange(len(revs) - 1):

1187

for d in xrange(len(revs) - 1):

1183

a, b = revs[d], revs[d + 1]

1188

a, b = revs[d], revs[d + 1]

1184

nb = self.node(b)

1189

nb = self.node(b)

1185

1190

1186

if infocollect is not None:

1191

if infocollect is not None:

1187

infocollect(nb)

1192

infocollect(nb)

1188

1193

1189

p = self.parents(nb)

1194

p = self.parents(nb)

1190

meta = nb + p[0] + p[1] + lookup(nb)

1195

meta = nb + p[0] + p[1] + lookup(nb)

1191

if a == -1:

1196

if a == -1:

1192

d = self.revision(nb)

1197

d = self.revision(nb)

1193

meta += mdiff.trivialdiffheader(len(d))

1198

meta += mdiff.trivialdiffheader(len(d))

1194

else:

1199

else:

1195

d = self.revdiff(a, b)

1200

d = self.revdiff(a, b)

1196

yield changegroup.chunkheader(len(meta) + len(d))

1201

yield changegroup.chunkheader(len(meta) + len(d))

1197

yield meta

1202

yield meta

1198

yield d

1203

yield d

1199

1204

1200

yield changegroup.closechunk()

1205

yield changegroup.closechunk()

1201

1206

1202

def addgroup(self, revs, linkmapper, transaction):

1207

def addgroup(self, revs, linkmapper, transaction):

1203

"""

1208

"""

1204

add a delta group

1209

add a delta group

1205

1210

1206

given a set of deltas, add them to the revision log. the

1211

given a set of deltas, add them to the revision log. the

1207

first delta is against its parent, which should be in our

1212

first delta is against its parent, which should be in our

1208

log, the rest are against the previous delta.

1213

log, the rest are against the previous delta.

1209

"""

1214

"""

1210

1215

1211

#track the base of the current delta log

1216

#track the base of the current delta log

1212

r = len(self)

1217

r = len(self)

1213

t = r - 1

1218

t = r - 1

1214

node = None

1219

node = None

1215

1220

1216

base = prev = nullrev

1221

base = prev = nullrev

1217

start = end = textlen = 0

1222

start = end = textlen = 0

1218

if r:

1223

if r:

1219

end = self.end(t)

1224

end = self.end(t)

1220

1225

1221

ifh = self.opener(self.indexfile, "a+")

1226

ifh = self.opener(self.indexfile, "a+")

1222

isize = r * self._io.size

1227

isize = r * self._io.size

1223

if self._inline:

1228

if self._inline:

1224

transaction.add(self.indexfile, end + isize, r)

1229

transaction.add(self.indexfile, end + isize, r)

1225

dfh = None

1230

dfh = None

1226

else:

1231

else:

1227

transaction.add(self.indexfile, isize, r)

1232

transaction.add(self.indexfile, isize, r)

1228

transaction.add(self.datafile, end)

1233

transaction.add(self.datafile, end)

1229

dfh = self.opener(self.datafile, "a")

1234

dfh = self.opener(self.datafile, "a")

1230

1235

1231

try:

1236

try:

1232

# loop through our set of deltas

1237

# loop through our set of deltas

1233

chain = None

1238

chain = None

1234

for chunk in revs:

1239

for chunk in revs:

1235

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1240

node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])

1236

link = linkmapper(cs)

1241

link = linkmapper(cs)

1237

if node in self.nodemap:

1242

if node in self.nodemap:

1238

# this can happen if two branches make the same change

1243

# this can happen if two branches make the same change

1239

chain = node

1244

chain = node

1240

continue

1245

continue

1241

delta = buffer(chunk, 80)

1246

delta = buffer(chunk, 80)

1242

del chunk

1247

del chunk

1243

1248

1244

for p in (p1, p2):

1249

for p in (p1, p2):

1245

if not p in self.nodemap:

1250

if not p in self.nodemap:

1246

raise LookupError(p, self.indexfile, _('unknown parent'))

1251

raise LookupError(p, self.indexfile, _('unknown parent'))

1247

1252

1248

if not chain:

1253

if not chain:

1249

# retrieve the parent revision of the delta chain

1254

# retrieve the parent revision of the delta chain

1250

chain = p1

1255

chain = p1

1251

if not chain in self.nodemap:

1256

if not chain in self.nodemap:

1252

raise LookupError(chain, self.indexfile, _('unknown base'))

1257

raise LookupError(chain, self.indexfile, _('unknown base'))

1253

1258

1254

# full versions are inserted when the needed deltas become

1259

# full versions are inserted when the needed deltas become

1255

# comparable to the uncompressed text or when the previous

1260

# comparable to the uncompressed text or when the previous

1256

# version is not the one we have a delta against. We use

1261

# version is not the one we have a delta against. We use

1257

# the size of the previous full rev as a proxy for the

1262

# the size of the previous full rev as a proxy for the

1258

# current size.

1263

# current size.

1259

1264

1260

if chain == prev:

1265

if chain == prev:

1261

cdelta = compress(delta)

1266

cdelta = compress(delta)

1262

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1267

cdeltalen = len(cdelta[0]) + len(cdelta[1])

1263

textlen = mdiff.patchedsize(textlen, delta)

1268

textlen = mdiff.patchedsize(textlen, delta)

1264

1269

1265

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1270

if chain != prev or (end - start + cdeltalen) > textlen * 2:

1266

# flush our writes here so we can read it in revision

1271

# flush our writes here so we can read it in revision

1267

if dfh:

1272

if dfh:

1268

dfh.flush()

1273

dfh.flush()

1269

ifh.flush()

1274

ifh.flush()

1270

text = self.revision(chain)

1275

text = self.revision(chain)

1271

if len(text) == 0:

1276

if len(text) == 0:

1272

# skip over trivial delta header

1277

# skip over trivial delta header

1273

text = buffer(delta, 12)

1278

text = buffer(delta, 12)

1274

else:

1279

else:

1275

text = mdiff.patches(text, [delta])

1280

text = mdiff.patches(text, [delta])

1276

del delta

1281

del delta

1277

chk = self._addrevision(text, transaction, link, p1, p2, None,

1282

chk = self._addrevision(text, transaction, link, p1, p2, None,

1278

ifh, dfh)

1283

ifh, dfh)

1279

if not dfh and not self._inline:

1284

if not dfh and not self._inline:

1280

# addrevision switched from inline to conventional

1285

# addrevision switched from inline to conventional

1281

# reopen the index

1286

# reopen the index

1282

dfh = self.opener(self.datafile, "a")

1287

dfh = self.opener(self.datafile, "a")

1283

ifh = self.opener(self.indexfile, "a")

1288

ifh = self.opener(self.indexfile, "a")

1284

if chk != node:

1289

if chk != node:

1285

raise RevlogError(_("consistency error adding group"))

1290

raise RevlogError(_("consistency error adding group"))

1286

textlen = len(text)

1291

textlen = len(text)

1287

else:

1292

else:

1288

e = (offset_type(end, 0), cdeltalen, textlen, base,

1293

e = (offset_type(end, 0), cdeltalen, textlen, base,

1289

link, self.rev(p1), self.rev(p2), node)

1294

link, self.rev(p1), self.rev(p2), node)

1290

self.index.insert(-1, e)

1295

self.index.insert(-1, e)

1291

self.nodemap[node] = r

1296

self.nodemap[node] = r

1292

entry = self._io.packentry(e, self.node, self.version, r)

1297

entry = self._io.packentry(e, self.node, self.version, r)

1293

if self._inline:

1298

if self._inline:

1294

ifh.write(entry)

1299

ifh.write(entry)

1295

ifh.write(cdelta[0])

1300

ifh.write(cdelta[0])

1296

ifh.write(cdelta[1])

1301

ifh.write(cdelta[1])

1297

self.checkinlinesize(transaction, ifh)

1302

self.checkinlinesize(transaction, ifh)

1298

if not self._inline:

1303

if not self._inline:

1299

dfh = self.opener(self.datafile, "a")

1304

dfh = self.opener(self.datafile, "a")

1300

ifh = self.opener(self.indexfile, "a")

1305

ifh = self.opener(self.indexfile, "a")

1301

else:

1306

else:

1302

dfh.write(cdelta[0])

1307

dfh.write(cdelta[0])

1303

dfh.write(cdelta[1])

1308

dfh.write(cdelta[1])

1304

ifh.write(entry)

1309

ifh.write(entry)

1305

1310

1306

t, r, chain, prev = r, r + 1, node, node

1311

t, r, chain, prev = r, r + 1, node, node

1307

base = self.base(t)

1312

base = self.base(t)

1308

start = self.start(base)

1313

start = self.start(base)

1309

end = self.end(t)

1314

end = self.end(t)

1310

finally:

1315

finally:

1311

if dfh:

1316

if dfh:

1312

dfh.close()

1317

dfh.close()

1313

ifh.close()

1318

ifh.close()

1314

1319

1315

return node

1320

return node

1316

1321

1317

def strip(self, minlink, transaction):

1322

def strip(self, minlink, transaction):

1318

"""truncate the revlog on the first revision with a linkrev >= minlink

1323

"""truncate the revlog on the first revision with a linkrev >= minlink

1319

1324

1320

This function is called when we're stripping revision minlink and

1325

This function is called when we're stripping revision minlink and

1321

its descendants from the repository.

1326

its descendants from the repository.

1322

1327

1323

We have to remove all revisions with linkrev >= minlink, because

1328

We have to remove all revisions with linkrev >= minlink, because

1324

the equivalent changelog revisions will be renumbered after the

1329

the equivalent changelog revisions will be renumbered after the

1325

strip.

1330

strip.

1326

1331

1327

So we truncate the revlog on the first of these revisions, and

1332

So we truncate the revlog on the first of these revisions, and

1328

trust that the caller has saved the revisions that shouldn't be

1333

trust that the caller has saved the revisions that shouldn't be

1329

removed and that it'll readd them after this truncation.

1334

removed and that it'll readd them after this truncation.

1330

"""

1335

"""

1331

if len(self) == 0:

1336

if len(self) == 0:

1332

return

1337

return

1333

1338

1334

if isinstance(self.index, lazyindex):

1339

if isinstance(self.index, lazyindex):

1335

self._loadindexmap()

1340

self._loadindexmap()

1336

1341

1337

for rev in self:

1342

for rev in self:

1338

if self.index[rev][4] >= minlink:

1343

if self.index[rev][4] >= minlink:

1339

break

1344

break

1340

else:

1345

else:

1341

return

1346

return

1342

1347

1343

# first truncate the files on disk

1348

# first truncate the files on disk

1344

end = self.start(rev)

1349

end = self.start(rev)

1345

if not self._inline:

1350

if not self._inline:

1346

transaction.add(self.datafile, end)

1351

transaction.add(self.datafile, end)

1347

end = rev * self._io.size

1352

end = rev * self._io.size

1348

else:

1353

else:

1349

end += rev * self._io.size

1354

end += rev * self._io.size

1350

1355

1351

transaction.add(self.indexfile, end)

1356

transaction.add(self.indexfile, end)

1352

1357

1353

# then reset internal state in memory to forget those revisions

1358

# then reset internal state in memory to forget those revisions

1354

self._cache = None

1359

self._cache = None

1355

self._chunkclear()

1360

self._chunkclear()

1356

for x in xrange(rev, len(self)):

1361

for x in xrange(rev, len(self)):

1357

del self.nodemap[self.node(x)]

1362

del self.nodemap[self.node(x)]

1358

1363

1359

del self.index[rev:-1]

1364

del self.index[rev:-1]

1360

1365

1361

def checksize(self):

1366

def checksize(self):

1362

expected = 0

1367

expected = 0

1363

if len(self):

1368

if len(self):

1364

expected = max(0, self.end(len(self) - 1))

1369

expected = max(0, self.end(len(self) - 1))

1365

1370

1366

try:

1371

try:

1367

f = self.opener(self.datafile)

1372

f = self.opener(self.datafile)

1368

f.seek(0, 2)

1373

f.seek(0, 2)

1369

actual = f.tell()

1374

actual = f.tell()

1370

dd = actual - expected

1375

dd = actual - expected

1371

except IOError, inst:

1376

except IOError, inst:

1372

if inst.errno != errno.ENOENT:

1377

if inst.errno != errno.ENOENT:

1373

raise

1378

raise

1374

dd = 0

1379

dd = 0

1375

1380

1376

try:

1381

try:

1377

f = self.opener(self.indexfile)

1382

f = self.opener(self.indexfile)

1378

f.seek(0, 2)

1383

f.seek(0, 2)

1379

actual = f.tell()

1384

actual = f.tell()

1380

s = self._io.size

1385

s = self._io.size

1381

i = max(0, actual // s)

1386

i = max(0, actual // s)

1382

di = actual - (i * s)

1387

di = actual - (i * s)

1383

if self._inline:

1388

if self._inline:

1384

databytes = 0

1389

databytes = 0

1385

for r in self:

1390

for r in self:

1386

databytes += max(0, self.length(r))

1391

databytes += max(0, self.length(r))

1387

dd = 0

1392

dd = 0

1388

di = actual - len(self) * s - databytes

1393

di = actual - len(self) * s - databytes

1389

except IOError, inst:

1394

except IOError, inst:

1390

if inst.errno != errno.ENOENT:

1395

if inst.errno != errno.ENOENT:

1391

raise

1396

raise

1392

di = 0

1397

di = 0

1393

1398

1394

return (dd, di)

1399

return (dd, di)

1395

1400

1396

def files(self):

1401

def files(self):

1397

res = [self.indexfile]

1402

res = [self.indexfile]

1398

if not self._inline:

1403

if not self._inline:

1399

res.append(self.datafile)

1404

res.append(self.datafile)

1400

return res

1405

return res

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             # import stuff from node for others to import from revlog
             from node import bin, hex, nullid, nullrev, short #@UnusedImport
             from i18n import _
             import changegroup, ancestor, mdiff, parsers, error, util
             import struct, zlib, errno
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
+            REVIDX_PUNCHED_FLAG = 2
+            REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG
             # amount of data read unconditionally, should be >= 4
             # when not inline: threshold for using lazy index
             _prereadsize = 1048576
             # max size of revlog with inline data
             _maxinline = 131072
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             nullhash = _sha(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = _sha(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             def compress(text):
                 """ generate a possibly-compressed representation of text """
                 if not text:
                     return ("", text)
                 l = len(text)
                 bin = None
                 if l < 44:
                     pass
                 elif l > 1000000:
                     # zlib makes an internal copy, thus doubling memory usage for
                     # large files, so lets do this in pieces
                     z = zlib.compressobj()
                     p = []
                     pos = 0
                     while pos < l:
                         pos2 = pos + 2**20
                         p.append(z.compress(text[pos:pos2]))
                         pos = pos2
                     p.append(z.flush())
                     if sum(map(len, p)) < l:
                         bin = "".join(p)
                 else:
                     bin = _compress(text)
                 if bin is None or len(bin) > l:
                     if text[0] == '\0':
                         return ("", text)
                     return ('u', text)
                 return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     return _decompress(bin)
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             class lazyparser(object):
                 """
                 this class avoids the need to parse the entirety of large indices
                 """
                 # lazyparser is not safe to use on windows if win32 extensions not
                 # available. it keeps file handle open, which make it not possible
                 # to break hardlinks on local cloned repos.
                 def __init__(self, dataf):
                     try:
                         size = util.fstat(dataf).st_size
                     except AttributeError:
                         size = 0
                     self.dataf = dataf
                     self.s = struct.calcsize(indexformatng)
                     self.datasize = size
                     self.l = size // self.s
                     self.index = [None] * self.l
                     self.map = {nullid: nullrev}
                     self.allmap = 0
                     self.all = 0
                     self.mapfind_count = 0
                 def loadmap(self):
                     """
                     during a commit, we need to make sure the rev being added is
                     not a duplicate.  This requires loading the entire index,
                     which is fairly slow.  loadmap can load up just the node map,
                     which takes much less time.
                     """
                     if self.allmap:
                         return
                     end = self.datasize
                     self.allmap = 1
                     cur = 0
                     count = 0
                     blocksize = self.s * 256
                     self.dataf.seek(0)
                     while cur < end:
                         data = self.dataf.read(blocksize)
                         off = 0
                         for x in xrange(256):
                             n = data[off + ngshaoffset:off + ngshaoffset + 20]
                             self.map[n] = count
                             count += 1
                             if count >= self.l:
                                 break
                             off += self.s
                         cur += blocksize
                 def loadblock(self, blockstart, blocksize, data=None):
                     if self.all:
                         return
                     if data is None:
                         self.dataf.seek(blockstart)
                         if blockstart + blocksize > self.datasize:
                             # the revlog may have grown since we've started running,
                             # but we don't have space in self.index for more entries.
                             # limit blocksize so that we don't get too much data.
                             blocksize = max(self.datasize - blockstart, 0)
                         data = self.dataf.read(blocksize)
                     lend = len(data) // self.s
                     i = blockstart // self.s
                     off = 0
                     # lazyindex supports __delitem__
                     if lend > len(self.index) - i:
                         lend = len(self.index) - i
                     for x in xrange(lend):
                         if self.index[i + x] is None:
                             b = data[off : off + self.s]
                             self.index[i + x] = b
                             n = b[ngshaoffset:ngshaoffset + 20]
                             self.map[n] = i + x
                         off += self.s
                 def findnode(self, node):
                     """search backwards through the index file for a specific node"""
                     if self.allmap:
                         return None
                     # hg log will cause many many searches for the manifest
                     # nodes.  After we get called a few times, just load the whole
                     # thing.
                     if self.mapfind_count > 8:
                         self.loadmap()
                         if node in self.map:
                             return node
                         return None
                     self.mapfind_count += 1
                     last = self.l - 1
                     while self.index[last] != None:
                         if last == 0:
                             self.all = 1
                             self.allmap = 1
                             return None
                         last -= 1
                     end = (last + 1) * self.s
                     blocksize = self.s * 256
                     while end >= 0:
                         start = max(end - blocksize, 0)
                         self.dataf.seek(start)
                         data = self.dataf.read(end - start)
                         findend = end - start
                         while True:
                             # we're searching backwards, so we have to make sure
                             # we don't find a changeset where this node is a parent
                             off = data.find(node, 0, findend)
                             findend = off
                             if off >= 0:
                                 i = off / self.s
                                 off = i * self.s
                                 n = data[off + ngshaoffset:off + ngshaoffset + 20]
                                 if n == node:
                                     self.map[n] = i + start / self.s
                                     return node
                             else:
                                 break
                         end -= blocksize
                     return None
                 def loadindex(self, i=None, end=None):
                     if self.all:
                         return
                     all = False
                     if i is None:
                         blockstart = 0
                         blocksize = (65536 / self.s) * self.s
                         end = self.datasize
                         all = True
                     else:
                         if end:
                             blockstart = i * self.s
                             end = end * self.s
                             blocksize = end - blockstart
                         else:
                             blockstart = (i & ~1023) * self.s
                             blocksize = self.s * 1024
                             end = blockstart + blocksize
                     while blockstart < end:
                         self.loadblock(blockstart, blocksize)
                         blockstart += blocksize
                     if all:
                         self.all = True
             class lazyindex(object):
                 """a lazy version of the index array"""
                 def __init__(self, parser):
                     self.p = parser
                 def __len__(self):
                     return len(self.p.index)
                 def load(self, pos):
                     if pos < 0:
                         pos += len(self.p.index)
                     self.p.loadindex(pos)
                     return self.p.index[pos]
                 def __getitem__(self, pos):
                     return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
                 def __setitem__(self, pos, item):
                     self.p.index[pos] = _pack(indexformatng, *item)
                 def __delitem__(self, pos):
                     del self.p.index[pos]
                 def insert(self, pos, e):
                     self.p.index.insert(pos, _pack(indexformatng, *e))
                 def append(self, e):
                     self.p.index.append(_pack(indexformatng, *e))
             class lazymap(object):
                 """a lazy version of the node map"""
                 def __init__(self, parser):
                     self.p = parser
                 def load(self, key):
                     n = self.p.findnode(key)
                     if n is None:
                         raise KeyError(key)
                 def __contains__(self, key):
                     if key in self.p.map:
                         return True
                     self.p.loadmap()
                     return key in self.p.map
                 def __iter__(self):
                     yield nullid
                     for i, ret in enumerate(self.p.index):
                         if not ret:
                             self.p.loadindex(i)
                             ret = self.p.index[i]
                         if isinstance(ret, str):
                             ret = _unpack(indexformatng, ret)
                         yield ret[7]
                 def __getitem__(self, key):
                     try:
                         return self.p.map[key]
                     except KeyError:
                         try:
                             self.load(key)
                             return self.p.map[key]
                         except KeyError:
                             raise KeyError("node " + hex(key))
                 def __setitem__(self, key, val):
                     self.p.map[key] = val
                 def __delitem__(self, key):
                     del self.p.map[key]
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, fp, data, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     if len(data) == _prereadsize:
                         data += fp.read() # read the rest
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, fp, data, inline):
                     if len(data) == _prereadsize:
                         if util.openhardlinks() and not inline:
                             # big index, let's parse it on demand
                             parser = lazyparser(fp)
                             index = lazyindex(parser)
                             nodemap = lazymap(parser)
                             e = list(index[0])
                             type = gettype(e[0])
                             e[0] = offset_type(0, type)
                             index[0] = e
                             return index, nodemap, None
                         else:
                             data += fp.read()
                     # call the C implementation to parse the index data
                     index, nodemap, cache = parsers.parse_index(data, inline)
                     return index, nodemap, cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._chunkcache = (0, '')
                     self.nodemap = {nullid: nullrev}
                     self.index = []
                     v = REVLOG_DEFAULT_VERSION
                     if hasattr(opener, 'options') and 'defversion' in opener.options:
                         v = opener.options['defversion']
                         if v & REVLOGNG:
                             v |= REVLOGNGINLINEDATA
                     i = ''
                     try:
                         f = self.opener(self.indexfile)
                         if "nonlazy" in getattr(self.opener, 'options', {}):
                             i = f.read()
                         else:
                             i = f.read(_prereadsize)
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i[:4])[0]
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     if i:
                         try:
                             d = self._io.parseindex(f, i, self._inline)
                         except (ValueError, IndexError):
                             raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                         self.index, self.nodemap, self._chunkcache = d
                         if not self._chunkcache:
                             self._chunkclear()
                     # add the magic null revision at -1 (if it hasn't been done already)
                     if (self.index == [] or isinstance(self.index, lazyindex) or
                         self.index[-1][7] != nullid) :
                         self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                 def _loadindex(self, start, end):
                     """load a block of indexes all at once from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         self.index.p.loadindex(start, end)
                 def _loadindexmap(self):
                     """loads both the map and the index from the lazy parser"""
                     if isinstance(self.index, lazyindex):
                         p = self.index.p
                         p.loadindex()
                         self.nodemap = p.map
                 def _loadmap(self):
                     """loads the map from the lazy parser"""
                     if isinstance(self.nodemap, lazymap):
                         self.nodemap.p.loadmap()
                         self.nodemap = self.nodemap.p.map
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     for i in xrange(len(self)):
                         yield i
                 def rev(self, node):
                     try:
                         return self.nodemap[node]
                     except KeyError:
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def base(self, rev):
                     return self.index[rev][3]
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def size(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 def reachable(self, node, stop=None):
                     """return the set of all nodes ancestral to a given node, including
                      the node itself, stopping when stop is matched"""
                     reachable = set((node,))
                     visit = [node]
                     if stop:
                         stopn = self.rev(stop)
                     else:
                         stopn = 0
                     while visit:
                         n = visit.pop(0)
                         if n == stop:
                             continue
                         if n == nullid:
                             continue
                         for p in self.parents(n):
                             if self.rev(p) < stopn:
                                 continue
                             if p not in reachable:
                                 reachable.add(p)
                                 visit.append(p)
                     return reachable
                 def ancestors(self, *revs):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Yield a sequence of revision numbers starting with the parents
                     of each revision in revs, i.e., each revision is *not* considered
                     an ancestor of itself.  Results are in breadth-first order:
                     parents of each rev in revs, then parents of those, etc.  Result
                     does not include the null revision."""
                     visit = list(revs)
                     seen = set([nullrev])
                     while visit:
                         for parent in self.parentrevs(visit.pop(0)):
                             if parent not in seen:
                                 visit.append(parent)
                                 seen.add(parent)
                                 yield parent
                 def descendants(self, *revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     seen = set(revs)
                     for i in xrange(min(revs) + 1, len(self)):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     has = set(self.ancestors(*common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = [r for r in heads if r not in has]
                     while visit:
                         r = visit.pop(0)
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return [self.node(r) for r in missing]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendent of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, 0)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendent of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendents = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendents.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendents.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendent = False
                         if lowestrev == nullrev:  # Everybody is a descendent of nullid
                             isdescendent = True
                         elif n in descendents:
                             # n is already a descendent
                             isdescendent = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendents before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendents, it's not a root.
                                 if (p[0] in descendents) or (p[1] in descendents):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendent if either of its parents are
                             # descendents.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendents) or (p[1] in descendents):
                                 descendents.add(n)
                                 isdescendent = True
                         if isdescendent and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendents and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = 1
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = 1
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n in heads.iterkeys() if heads[n] != 0]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         count = len(self)
                         if not count:
                             return [nullid]
                         ishead = [1] * (count + 1)
                         index = self.index
                         for r in xrange(count):
                             e = index[r]
                             ishead[e[5]] = ishead[e[6]] = 0
                         return [self.node(r) for r in xrange(count) if ishead[r]]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, len(self)):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, len(self)):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     for i in self.descendants(start):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # fast path, check if it is a descendant
                     a, b = self.rev(a), self.rev(b)
                     start, end = sorted((a, b))
                     if self.descendant(start, end):
                         return self.node(start)
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(a, b, parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def _match(self, id):
                     if isinstance(id, (long, int)):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             bin_id = bin(id[:l * 2])
                             nl = [n for n in self.nodemap if n[:l] == bin_id]
                             nl = [n for n in nl if hex(n).startswith(id)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _prereadsize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length):
                     if self._inline:
                         df = self.opener(self.indexfile)
                     else:
                         df = self.opener(self.datafile)
                     readahead = max(65536, length)
                     df.seek(offset)
                     d = df.read(readahead)
                     self._addchunk(offset, d)
                     if readahead > length:
                         return d[:length]
                     return d
                 def _getchunk(self, offset, length):
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return d[cachestart:cacheend]
                     return self._loadchunk(offset, length)
                 def _chunkraw(self, startrev, endrev):
                     start = self.start(startrev)
                     length = self.end(endrev) - start
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                     return self._getchunk(start, length)
                 def _chunk(self, rev):
                     return decompress(self._chunkraw(rev, rev))
                 def _chunkclear(self):
                     self._chunkcache = (0, '')
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
                         return self._chunk(rev2)
                     return mdiff.textdiff(self.revision(self.node(rev1)),
                                           self.revision(self.node(rev2)))
                 def revision(self, node):
                     """return an uncompressed revision of a given node"""
                     if node == nullid:
                         return ""
                     if self._cache and self._cache[0] == node:
                         return self._cache[2]
                     # look up what we need to read
                     text = None
                     rev = self.rev(node)
                     base = self.base(rev)
                     # check rev flags
-                    if self.flags(rev):
+                    if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_('incompatible revision flag %x') %
-                                          (self.flags(rev)))
+                                          (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
                     # do we have useful data cached?
                     if self._cache and self._cache[1] >= base and self._cache[1] < rev:
                         base = self._cache[1]
                         text = self._cache[2]
                     self._loadindex(base, rev + 1)
                     self._chunkraw(base, rev)
                     if text is None:
                         text = self._chunk(base)
                     bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
                     text = mdiff.patches(text, bins)
                     p1, p2 = self.parents(node)
-                    if node != hash(text, p1, p2):
+                    if (node != hash(text, p1, p2) and
+                        not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.indexfile, rev))
                     self._cache = (node, rev, text)
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r))
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call rename, the temp file will never replace the
                     # real index
                     fp.rename()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, d=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     d - an optional precomputed delta
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     curr = len(self)
                     prev = curr - 1
                     base = self.base(prev)
                     offset = self.end(prev)
                     if curr:
                         if not d:
                             ptext = self.revision(self.node(prev))
                             d = mdiff.textdiff(ptext, text)
                         data = compress(d)
                         l = len(data[1]) + len(data[0])
                         dist = l + offset - self.start(base)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
-                    if not curr or dist > len(text) * 2:
+                    # or the base revision is punched
+                    if (not curr or dist > len(text) * 2 or
+                        (self.flags(base) & REVIDX_PUNCHED_FLAG)):
                         data = compress(text)
                         l = len(data[1]) + len(data[0])
                         base = curr
                     e = (offset_type(offset, 0), l, len(text),
                          base, link, self.rev(p1), self.rev(p2), node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     return node
                 def group(self, nodelist, lookup, infocollect=None):
                     """Calculate a delta group, yielding a sequence of changegroup chunks
                     (strings).
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. the first delta is
                     parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
                     have this parent as it has all history before these
                     changesets. parent is parent[0]
                     """
                     revs = [self.rev(n) for n in nodelist]
                     # if we don't have any revisions touched by these changesets, bail
                     if not revs:
                         yield changegroup.closechunk()
                         return
                     # add the parent of the first rev
                     p = self.parentrevs(revs[0])[0]
                     revs.insert(0, p)
                     # build deltas
                     for d in xrange(len(revs) - 1):
                         a, b = revs[d], revs[d + 1]
                         nb = self.node(b)
                         if infocollect is not None:
                             infocollect(nb)
                         p = self.parents(nb)
                         meta = nb + p[0] + p[1] + lookup(nb)
                         if a == -1:
                             d = self.revision(nb)
                             meta += mdiff.trivialdiffheader(len(d))
                         else:
                             d = self.revdiff(a, b)
                         yield changegroup.chunkheader(len(meta) + len(d))
                         yield meta
                         yield d
                     yield changegroup.closechunk()
                 def addgroup(self, revs, linkmapper, transaction):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     #track the base of the current delta log
                     r = len(self)
                     t = r - 1
                     node = None
                     base = prev = nullrev
                     start = end = textlen = 0
                     if r:
                         end = self.end(t)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunk in revs:
                             node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             delta = buffer(chunk, 80)
                             del chunk
                             for p in (p1, p2):
                                 if not p in self.nodemap:
                                     raise LookupError(p, self.indexfile, _('unknown parent'))
                             if not chain:
                                 # retrieve the parent revision of the delta chain
                                 chain = p1
                                 if not chain in self.nodemap:
                                     raise LookupError(chain, self.indexfile, _('unknown base'))
                             # full versions are inserted when the needed deltas become
                             # comparable to the uncompressed text or when the previous
                             # version is not the one we have a delta against. We use
                             # the size of the previous full rev as a proxy for the
                             # current size.
                             if chain == prev:
                                 cdelta = compress(delta)
                                 cdeltalen = len(cdelta[0]) + len(cdelta[1])
                                 textlen = mdiff.patchedsize(textlen, delta)
                             if chain != prev or (end - start + cdeltalen) > textlen * 2:
                                 # flush our writes here so we can read it in revision
                                 if dfh:
                                     dfh.flush()
                                 ifh.flush()
                                 text = self.revision(chain)
                                 if len(text) == 0:
                                     # skip over trivial delta header
                                     text = buffer(delta, 12)
                                 else:
                                     text = mdiff.patches(text, [delta])
                                 del delta
                                 chk = self._addrevision(text, transaction, link, p1, p2, None,
                                                         ifh, dfh)
                                 if not dfh and not self._inline:
                                     # addrevision switched from inline to conventional
                                     # reopen the index
                                     dfh = self.opener(self.datafile, "a")
                                     ifh = self.opener(self.indexfile, "a")
                                 if chk != node:
                                     raise RevlogError(_("consistency error adding group"))
                                 textlen = len(text)
                             else:
                                 e = (offset_type(end, 0), cdeltalen, textlen, base,
                                      link, self.rev(p1), self.rev(p2), node)
                                 self.index.insert(-1, e)
                                 self.nodemap[node] = r
                                 entry = self._io.packentry(e, self.node, self.version, r)
                                 if self._inline:
                                     ifh.write(entry)
                                     ifh.write(cdelta[0])
                                     ifh.write(cdelta[1])
                                     self.checkinlinesize(transaction, ifh)
                                     if not self._inline:
                                         dfh = self.opener(self.datafile, "a")
                                         ifh = self.opener(self.indexfile, "a")
                                 else:
                                     dfh.write(cdelta[0])
                                     dfh.write(cdelta[1])
                                     ifh.write(entry)
                             t, r, chain, prev = r, r + 1, node, node
                             base = self.base(t)
                             start = self.start(base)
                             end = self.end(t)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return node
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll readd them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     if isinstance(self.index, lazyindex):
                         self._loadindexmap()
                     for rev in self:
                         if self.index[rev][4] >= minlink:
                             break
                     else:
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res