upstream/mercurial-mirror Commit - r17128:1028a1c9

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

# import stuff from node for others to import from revlog

14

# import stuff from node for others to import from revlog

15

from node import bin, hex, nullid, nullrev

15

from node import bin, hex, nullid, nullrev

16

from i18n import _

16

from i18n import _

17

import ancestor, mdiff, parsers, error, util, dagutil

17

import ancestor, mdiff, parsers, error, util, dagutil

18

import struct, zlib, errno

18

import struct, zlib, errno

19

20

_pack = struct.pack

20

_pack = struct.pack

21

_unpack = struct.unpack

21

_unpack = struct.unpack

22

_compress = zlib.compress

22

_compress = zlib.compress

23

_decompress = zlib.decompress

23

_decompress = zlib.decompress

24

_sha = util.sha1

24

_sha = util.sha1

25

26

# revlog header flags

26

# revlog header flags

27

REVLOGV0 = 0

27

REVLOGV0 = 0

28

REVLOGNG = 1

28

REVLOGNG = 1

29

REVLOGNGINLINEDATA = (1 << 16)

29

REVLOGNGINLINEDATA = (1 << 16)

30

REVLOGGENERALDELTA = (1 << 17)

30

REVLOGGENERALDELTA = (1 << 17)

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

31

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

32

REVLOG_DEFAULT_FORMAT = REVLOGNG

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

33

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

34

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

35

36

# revlog index flags

36

# revlog index flags

37

REVIDX_KNOWN_FLAGS = 0

37

REVIDX_KNOWN_FLAGS = 0

38

39

# max size of revlog with inline data

39

# max size of revlog with inline data

40

_maxinline = 131072

40

_maxinline = 131072

41

_chunksize = 1048576

41

_chunksize = 1048576

42

43

RevlogError = error.RevlogError

43

RevlogError = error.RevlogError

44

LookupError = error.LookupError

44

LookupError = error.LookupError

45

46

def getoffset(q):

46

def getoffset(q):

47

return int(q >> 16)

47

return int(q >> 16)

48

49

def gettype(q):

49

def gettype(q):

50

return int(q & 0xFFFF)

50

return int(q & 0xFFFF)

51

52

def offset_type(offset, type):

52

def offset_type(offset, type):

53

return long(long(offset) << 16 | type)

53

return long(long(offset) << 16 | type)

54

55

nullhash = _sha(nullid)

55

nullhash = _sha(nullid)

56

57

def hash(text, p1, p2):

57

def hash(text, p1, p2):

58

"""generate a hash from the given text and its parent hashes

58

"""generate a hash from the given text and its parent hashes

59

60

This hash combines both the current file contents and its history

60

This hash combines both the current file contents and its history

61

in a manner that makes it easy to distinguish nodes with the same

61

in a manner that makes it easy to distinguish nodes with the same

62

content in the revision graph.

62

content in the revision graph.

63

"""

63

"""

64

# As of now, if one of the parent node is null, p2 is null

64

# As of now, if one of the parent node is null, p2 is null

65

if p2 == nullid:

65

if p2 == nullid:

66

# deep copy of a hash is faster than creating one

66

# deep copy of a hash is faster than creating one

67

s = nullhash.copy()

67

s = nullhash.copy()

68

s.update(p1)

68

s.update(p1)

69

else:

69

else:

70

# none of the parent nodes are nullid

70

# none of the parent nodes are nullid

71

l = [p1, p2]

71

l = [p1, p2]

72

l.sort()

72

l.sort()

73

s = _sha(l[0])

73

s = _sha(l[0])

74

s.update(l[1])

74

s.update(l[1])

75

s.update(text)

75

s.update(text)

76

return s.digest()

76

return s.digest()

77

78

def compress(text):

79

""" generate a possibly-compressed representation of text """

80

if not text:

81

return ("", text)

82

l = len(text)

83

bin = None

84

if l < 44:

85

pass

86

elif l > 1000000:

87

# zlib makes an internal copy, thus doubling memory usage for

88

# large files, so lets do this in pieces

89

z = zlib.compressobj()

90

p = []

91

pos = 0

92

while pos < l:

93

pos2 = pos + 2**20

94

p.append(z.compress(text[pos:pos2]))

95

pos = pos2

96

p.append(z.flush())

97

if sum(map(len, p)) < l:

98

bin = "".join(p)

99

else:

100

bin = _compress(text)

101

if bin is None or len(bin) > l:

102

if text[0] == '\0':

103

return ("", text)

104

return ('u', text)

105

return ("", bin)

106

107

def decompress(bin):

78

def decompress(bin):

108

""" decompress the given input """

79

""" decompress the given input """

109

if not bin:

80

if not bin:

110

return bin

81

return bin

111

t = bin[0]

82

t = bin[0]

112

if t == '\0':

83

if t == '\0':

113

return bin

84

return bin

114

if t == 'x':

85

if t == 'x':

115

try:

86

try:

116

return _decompress(bin)

87

return _decompress(bin)

117

except zlib.error, e:

88

except zlib.error, e:

118

raise RevlogError(_("revlog decompress error: %s") % str(e))

89

raise RevlogError(_("revlog decompress error: %s") % str(e))

119

if t == 'u':

90

if t == 'u':

120

return bin[1:]

91

return bin[1:]

121

raise RevlogError(_("unknown compression type %r") % t)

92

raise RevlogError(_("unknown compression type %r") % t)

122

93

123

indexformatv0 = ">4l20s20s20s"

94

indexformatv0 = ">4l20s20s20s"

124

v0shaoffset = 56

95

v0shaoffset = 56

125

96

126

class revlogoldio(object):

97

class revlogoldio(object):

127

def __init__(self):

98

def __init__(self):

128

self.size = struct.calcsize(indexformatv0)

99

self.size = struct.calcsize(indexformatv0)

129

100

130

def parseindex(self, data, inline):

101

def parseindex(self, data, inline):

131

s = self.size

102

s = self.size

132

index = []

103

index = []

133

nodemap = {nullid: nullrev}

104

nodemap = {nullid: nullrev}

134

n = off = 0

105

n = off = 0

135

l = len(data)

106

l = len(data)

136

while off + s <= l:

107

while off + s <= l:

137

cur = data[off:off + s]

108

cur = data[off:off + s]

138

off += s

109

off += s

139

e = _unpack(indexformatv0, cur)

110

e = _unpack(indexformatv0, cur)

140

# transform to revlogv1 format

111

# transform to revlogv1 format

141

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

112

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

142

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

113

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

143

index.append(e2)

114

index.append(e2)

144

nodemap[e[6]] = n

115

nodemap[e[6]] = n

145

n += 1

116

n += 1

146

117

147

# add the magic null revision at -1

118

# add the magic null revision at -1

148

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

119

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

149

120

150

return index, nodemap, None

121

return index, nodemap, None

151

122

152

def packentry(self, entry, node, version, rev):

123

def packentry(self, entry, node, version, rev):

153

if gettype(entry[0]):

124

if gettype(entry[0]):

154

raise RevlogError(_("index entry flags need RevlogNG"))

125

raise RevlogError(_("index entry flags need RevlogNG"))

155

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

126

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

156

node(entry[5]), node(entry[6]), entry[7])

127

node(entry[5]), node(entry[6]), entry[7])

157

return _pack(indexformatv0, *e2)

128

return _pack(indexformatv0, *e2)

158

129

159

# index ng:

130

# index ng:

160

# 6 bytes: offset

131

# 6 bytes: offset

161

# 2 bytes: flags

132

# 2 bytes: flags

162

# 4 bytes: compressed length

133

# 4 bytes: compressed length

163

# 4 bytes: uncompressed length

134

# 4 bytes: uncompressed length

164

# 4 bytes: base rev

135

# 4 bytes: base rev

165

# 4 bytes: link rev

136

# 4 bytes: link rev

166

# 4 bytes: parent 1 rev

137

# 4 bytes: parent 1 rev

167

# 4 bytes: parent 2 rev

138

# 4 bytes: parent 2 rev

168

# 32 bytes: nodeid

139

# 32 bytes: nodeid

169

indexformatng = ">Qiiiiii20s12x"

140

indexformatng = ">Qiiiiii20s12x"

170

ngshaoffset = 32

141

ngshaoffset = 32

171

versionformat = ">I"

142

versionformat = ">I"

172

143

173

class revlogio(object):

144

class revlogio(object):

174

def __init__(self):

145

def __init__(self):

175

self.size = struct.calcsize(indexformatng)

146

self.size = struct.calcsize(indexformatng)

176

147

177

def parseindex(self, data, inline):

148

def parseindex(self, data, inline):

178

# call the C implementation to parse the index data

149

# call the C implementation to parse the index data

179

index, cache = parsers.parse_index2(data, inline)

150

index, cache = parsers.parse_index2(data, inline)

180

return index, getattr(index, 'nodemap', None), cache

151

return index, getattr(index, 'nodemap', None), cache

181

152

182

def packentry(self, entry, node, version, rev):

153

def packentry(self, entry, node, version, rev):

183

p = _pack(indexformatng, *entry)

154

p = _pack(indexformatng, *entry)

184

if rev == 0:

155

if rev == 0:

185

p = _pack(versionformat, version) + p[4:]

156

p = _pack(versionformat, version) + p[4:]

186

return p

157

return p

187

158

188

class revlog(object):

159

class revlog(object):

189

"""

160

"""

190

the underlying revision storage object

161

the underlying revision storage object

191

162

192

A revlog consists of two parts, an index and the revision data.

163

A revlog consists of two parts, an index and the revision data.

193

164

194

The index is a file with a fixed record size containing

165

The index is a file with a fixed record size containing

195

information on each revision, including its nodeid (hash), the

166

information on each revision, including its nodeid (hash), the

196

nodeids of its parents, the position and offset of its data within

167

nodeids of its parents, the position and offset of its data within

197

the data file, and the revision it's based on. Finally, each entry

168

the data file, and the revision it's based on. Finally, each entry

198

contains a linkrev entry that can serve as a pointer to external

169

contains a linkrev entry that can serve as a pointer to external

199

data.

170

data.

200

171

201

The revision data itself is a linear collection of data chunks.

172

The revision data itself is a linear collection of data chunks.

202

Each chunk represents a revision and is usually represented as a

173

Each chunk represents a revision and is usually represented as a

203

delta against the previous chunk. To bound lookup time, runs of

174

delta against the previous chunk. To bound lookup time, runs of

204

deltas are limited to about 2 times the length of the original

175

deltas are limited to about 2 times the length of the original

205

version data. This makes retrieval of a version proportional to

176

version data. This makes retrieval of a version proportional to

206

its size, or O(1) relative to the number of revisions.

177

its size, or O(1) relative to the number of revisions.

207

178

208

Both pieces of the revlog are written to in an append-only

179

Both pieces of the revlog are written to in an append-only

209

fashion, which means we never need to rewrite a file to insert or

180

fashion, which means we never need to rewrite a file to insert or

210

remove data, and can use some simple techniques to avoid the need

181

remove data, and can use some simple techniques to avoid the need

211

for locking while reading.

182

for locking while reading.

212

"""

183

"""

213

def __init__(self, opener, indexfile):

184

def __init__(self, opener, indexfile):

214

"""

185

"""

215

create a revlog object

186

create a revlog object

216

187

217

opener is a function that abstracts the file opening operation

188

opener is a function that abstracts the file opening operation

218

and can be used to implement COW semantics or the like.

189

and can be used to implement COW semantics or the like.

219

"""

190

"""

220

self.indexfile = indexfile

191

self.indexfile = indexfile

221

self.datafile = indexfile[:-2] + ".d"

192

self.datafile = indexfile[:-2] + ".d"

222

self.opener = opener

193

self.opener = opener

223

self._cache = None

194

self._cache = None

224

self._basecache = (0, 0)

195

self._basecache = (0, 0)

225

self._chunkcache = (0, '')

196

self._chunkcache = (0, '')

226

self.index = []

197

self.index = []

227

self._pcache = {}

198

self._pcache = {}

228

self._nodecache = {nullid: nullrev}

199

self._nodecache = {nullid: nullrev}

229

self._nodepos = None

200

self._nodepos = None

230

201

231

v = REVLOG_DEFAULT_VERSION

202

v = REVLOG_DEFAULT_VERSION

232

opts = getattr(opener, 'options', None)

203

opts = getattr(opener, 'options', None)

233

if opts is not None:

204

if opts is not None:

234

if 'revlogv1' in opts:

205

if 'revlogv1' in opts:

235

if 'generaldelta' in opts:

206

if 'generaldelta' in opts:

236

v |= REVLOGGENERALDELTA

207

v |= REVLOGGENERALDELTA

237

else:

208

else:

238

v = 0

209

v = 0

239

210

240

i = ''

211

i = ''

241

self._initempty = True

212

self._initempty = True

242

try:

213

try:

243

f = self.opener(self.indexfile)

214

f = self.opener(self.indexfile)

244

i = f.read()

215

i = f.read()

245

f.close()

216

f.close()

246

if len(i) > 0:

217

if len(i) > 0:

247

v = struct.unpack(versionformat, i[:4])[0]

218

v = struct.unpack(versionformat, i[:4])[0]

248

self._initempty = False

219

self._initempty = False

249

except IOError, inst:

220

except IOError, inst:

250

if inst.errno != errno.ENOENT:

221

if inst.errno != errno.ENOENT:

251

raise

222

raise

252

223

253

self.version = v

224

self.version = v

254

self._inline = v & REVLOGNGINLINEDATA

225

self._inline = v & REVLOGNGINLINEDATA

255

self._generaldelta = v & REVLOGGENERALDELTA

226

self._generaldelta = v & REVLOGGENERALDELTA

256

flags = v & ~0xFFFF

227

flags = v & ~0xFFFF

257

fmt = v & 0xFFFF

228

fmt = v & 0xFFFF

258

if fmt == REVLOGV0 and flags:

229

if fmt == REVLOGV0 and flags:

259

raise RevlogError(_("index %s unknown flags %#04x for format v0")

230

raise RevlogError(_("index %s unknown flags %#04x for format v0")

260

% (self.indexfile, flags >> 16))

231

% (self.indexfile, flags >> 16))

261

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

232

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

262

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

233

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

263

% (self.indexfile, flags >> 16))

234

% (self.indexfile, flags >> 16))

264

elif fmt > REVLOGNG:

235

elif fmt > REVLOGNG:

265

raise RevlogError(_("index %s unknown format %d")

236

raise RevlogError(_("index %s unknown format %d")

266

% (self.indexfile, fmt))

237

% (self.indexfile, fmt))

267

238

268

self._io = revlogio()

239

self._io = revlogio()

269

if self.version == REVLOGV0:

240

if self.version == REVLOGV0:

270

self._io = revlogoldio()

241

self._io = revlogoldio()

271

try:

242

try:

272

d = self._io.parseindex(i, self._inline)

243

d = self._io.parseindex(i, self._inline)

273

except (ValueError, IndexError):

244

except (ValueError, IndexError):

274

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

245

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

275

self.index, nodemap, self._chunkcache = d

246

self.index, nodemap, self._chunkcache = d

276

if nodemap is not None:

247

if nodemap is not None:

277

self.nodemap = self._nodecache = nodemap

248

self.nodemap = self._nodecache = nodemap

278

if not self._chunkcache:

249

if not self._chunkcache:

279

self._chunkclear()

250

self._chunkclear()

280

251

281

def tip(self):

252

def tip(self):

282

return self.node(len(self.index) - 2)

253

return self.node(len(self.index) - 2)

283

def __len__(self):

254

def __len__(self):

284

return len(self.index) - 1

255

return len(self.index) - 1

285

def __iter__(self):

256

def __iter__(self):

286

for i in xrange(len(self)):

257

for i in xrange(len(self)):

287

yield i

258

yield i

288

259

289

@util.propertycache

260

@util.propertycache

290

def nodemap(self):

261

def nodemap(self):

291

self.rev(self.node(0))

262

self.rev(self.node(0))

292

return self._nodecache

263

return self._nodecache

293

264

294

def hasnode(self, node):

265

def hasnode(self, node):

295

try:

266

try:

296

self.rev(node)

267

self.rev(node)

297

return True

268

return True

298

except KeyError:

269

except KeyError:

299

return False

270

return False

300

271

301

def clearcaches(self):

272

def clearcaches(self):

302

try:

273

try:

303

self._nodecache.clearcaches()

274

self._nodecache.clearcaches()

304

except AttributeError:

275

except AttributeError:

305

self._nodecache = {nullid: nullrev}

276

self._nodecache = {nullid: nullrev}

306

self._nodepos = None

277

self._nodepos = None

307

278

308

def rev(self, node):

279

def rev(self, node):

309

try:

280

try:

310

return self._nodecache[node]

281

return self._nodecache[node]

311

except RevlogError:

282

except RevlogError:

312

# parsers.c radix tree lookup failed

283

# parsers.c radix tree lookup failed

313

raise LookupError(node, self.indexfile, _('no node'))

284

raise LookupError(node, self.indexfile, _('no node'))

314

except KeyError:

285

except KeyError:

315

# pure python cache lookup failed

286

# pure python cache lookup failed

316

n = self._nodecache

287

n = self._nodecache

317

i = self.index

288

i = self.index

318

p = self._nodepos

289

p = self._nodepos

319

if p is None:

290

if p is None:

320

p = len(i) - 2

291

p = len(i) - 2

321

for r in xrange(p, -1, -1):

292

for r in xrange(p, -1, -1):

322

v = i[r][7]

293

v = i[r][7]

323

n[v] = r

294

n[v] = r

324

if v == node:

295

if v == node:

325

self._nodepos = r - 1

296

self._nodepos = r - 1

326

return r

297

return r

327

raise LookupError(node, self.indexfile, _('no node'))

298

raise LookupError(node, self.indexfile, _('no node'))

328

299

329

def node(self, rev):

300

def node(self, rev):

330

return self.index[rev][7]

301

return self.index[rev][7]

331

def linkrev(self, rev):

302

def linkrev(self, rev):

332

return self.index[rev][4]

303

return self.index[rev][4]

333

def parents(self, node):

304

def parents(self, node):

334

i = self.index

305

i = self.index

335

d = i[self.rev(node)]

306

d = i[self.rev(node)]

336

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

307

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

337

def parentrevs(self, rev):

308

def parentrevs(self, rev):

338

return self.index[rev][5:7]

309

return self.index[rev][5:7]

339

def start(self, rev):

310

def start(self, rev):

340

return int(self.index[rev][0] >> 16)

311

return int(self.index[rev][0] >> 16)

341

def end(self, rev):

312

def end(self, rev):

342

return self.start(rev) + self.length(rev)

313

return self.start(rev) + self.length(rev)

343

def length(self, rev):

314

def length(self, rev):

344

return self.index[rev][1]

315

return self.index[rev][1]

345

def chainbase(self, rev):

316

def chainbase(self, rev):

346

index = self.index

317

index = self.index

347

base = index[rev][3]

318

base = index[rev][3]

348

while base != rev:

319

while base != rev:

349

rev = base

320

rev = base

350

base = index[rev][3]

321

base = index[rev][3]

351

return base

322

return base

352

def flags(self, rev):

323

def flags(self, rev):

353

return self.index[rev][0] & 0xFFFF

324

return self.index[rev][0] & 0xFFFF

354

def rawsize(self, rev):

325

def rawsize(self, rev):

355

"""return the length of the uncompressed text for a given revision"""

326

"""return the length of the uncompressed text for a given revision"""

356

l = self.index[rev][2]

327

l = self.index[rev][2]

357

if l >= 0:

328

if l >= 0:

358

return l

329

return l

359

330

360

t = self.revision(self.node(rev))

331

t = self.revision(self.node(rev))

361

return len(t)

332

return len(t)

362

size = rawsize

333

size = rawsize

363

334

364

def ancestors(self, revs, stoprev=0):

335

def ancestors(self, revs, stoprev=0):

365

"""Generate the ancestors of 'revs' in reverse topological order.

336

"""Generate the ancestors of 'revs' in reverse topological order.

366

Does not generate revs lower than stoprev.

337

Does not generate revs lower than stoprev.

367

338

368

Yield a sequence of revision numbers starting with the parents

339

Yield a sequence of revision numbers starting with the parents

369

of each revision in revs, i.e., each revision is *not* considered

340

of each revision in revs, i.e., each revision is *not* considered

370

an ancestor of itself. Results are in breadth-first order:

341

an ancestor of itself. Results are in breadth-first order:

371

parents of each rev in revs, then parents of those, etc. Result

342

parents of each rev in revs, then parents of those, etc. Result

372

does not include the null revision."""

343

does not include the null revision."""

373

visit = util.deque(revs)

344

visit = util.deque(revs)

374

seen = set([nullrev])

345

seen = set([nullrev])

375

while visit:

346

while visit:

376

for parent in self.parentrevs(visit.popleft()):

347

for parent in self.parentrevs(visit.popleft()):

377

if parent < stoprev:

348

if parent < stoprev:

378

continue

349

continue

379

if parent not in seen:

350

if parent not in seen:

380

visit.append(parent)

351

visit.append(parent)

381

seen.add(parent)

352

seen.add(parent)

382

yield parent

353

yield parent

383

354

384

def incancestors(self, revs, stoprev=0):

355

def incancestors(self, revs, stoprev=0):

385

"""Identical to ancestors() except it also generates the

356

"""Identical to ancestors() except it also generates the

386

revisions, 'revs'"""

357

revisions, 'revs'"""

387

for rev in revs:

358

for rev in revs:

388

yield rev

359

yield rev

389

for rev in self.ancestors(revs, stoprev):

360

for rev in self.ancestors(revs, stoprev):

390

yield rev

361

yield rev

391

362

392

def descendants(self, revs):

363

def descendants(self, revs):

393

"""Generate the descendants of 'revs' in revision order.

364

"""Generate the descendants of 'revs' in revision order.

394

365

395

Yield a sequence of revision numbers starting with a child of

366

Yield a sequence of revision numbers starting with a child of

396

some rev in revs, i.e., each revision is *not* considered a

367

some rev in revs, i.e., each revision is *not* considered a

397

descendant of itself. Results are ordered by revision number (a

368

descendant of itself. Results are ordered by revision number (a

398

topological sort)."""

369

topological sort)."""

399

first = min(revs)

370

first = min(revs)

400

if first == nullrev:

371

if first == nullrev:

401

for i in self:

372

for i in self:

402

yield i

373

yield i

403

return

374

return

404

375

405

seen = set(revs)

376

seen = set(revs)

406

for i in xrange(first + 1, len(self)):

377

for i in xrange(first + 1, len(self)):

407

for x in self.parentrevs(i):

378

for x in self.parentrevs(i):

408

if x != nullrev and x in seen:

379

if x != nullrev and x in seen:

409

seen.add(i)

380

seen.add(i)

410

yield i

381

yield i

411

break

382

break

412

383

413

def findcommonmissing(self, common=None, heads=None):

384

def findcommonmissing(self, common=None, heads=None):

414

"""Return a tuple of the ancestors of common and the ancestors of heads

385

"""Return a tuple of the ancestors of common and the ancestors of heads

415

that are not ancestors of common. In revset terminology, we return the

386

that are not ancestors of common. In revset terminology, we return the

416

tuple:

387

tuple:

417

388

418

::common, (::heads) - (::common)

389

::common, (::heads) - (::common)

419

390

420

The list is sorted by revision number, meaning it is

391

The list is sorted by revision number, meaning it is

421

topologically sorted.

392

topologically sorted.

422

393

423

'heads' and 'common' are both lists of node IDs. If heads is

394

'heads' and 'common' are both lists of node IDs. If heads is

424

not supplied, uses all of the revlog's heads. If common is not

395

not supplied, uses all of the revlog's heads. If common is not

425

supplied, uses nullid."""

396

supplied, uses nullid."""

426

if common is None:

397

if common is None:

427

common = [nullid]

398

common = [nullid]

428

if heads is None:

399

if heads is None:

429

heads = self.heads()

400

heads = self.heads()

430

401

431

common = [self.rev(n) for n in common]

402

common = [self.rev(n) for n in common]

432

heads = [self.rev(n) for n in heads]

403

heads = [self.rev(n) for n in heads]

433

404

434

# we want the ancestors, but inclusive

405

# we want the ancestors, but inclusive

435

has = set(self.ancestors(common))

406

has = set(self.ancestors(common))

436

has.add(nullrev)

407

has.add(nullrev)

437

has.update(common)

408

has.update(common)

438

409

439

# take all ancestors from heads that aren't in has

410

# take all ancestors from heads that aren't in has

440

missing = set()

411

missing = set()

441

visit = util.deque(r for r in heads if r not in has)

412

visit = util.deque(r for r in heads if r not in has)

442

while visit:

413

while visit:

443

r = visit.popleft()

414

r = visit.popleft()

444

if r in missing:

415

if r in missing:

445

continue

416

continue

446

else:

417

else:

447

missing.add(r)

418

missing.add(r)

448

for p in self.parentrevs(r):

419

for p in self.parentrevs(r):

449

if p not in has:

420

if p not in has:

450

visit.append(p)

421

visit.append(p)

451

missing = list(missing)

422

missing = list(missing)

452

missing.sort()

423

missing.sort()

453

return has, [self.node(r) for r in missing]

424

return has, [self.node(r) for r in missing]

454

425

455

def findmissing(self, common=None, heads=None):

426

def findmissing(self, common=None, heads=None):

456

"""Return the ancestors of heads that are not ancestors of common.

427

"""Return the ancestors of heads that are not ancestors of common.

457

428

458

More specifically, return a list of nodes N such that every N

429

More specifically, return a list of nodes N such that every N

459

satisfies the following constraints:

430

satisfies the following constraints:

460

431

461

1. N is an ancestor of some node in 'heads'

432

1. N is an ancestor of some node in 'heads'

462

2. N is not an ancestor of any node in 'common'

433

2. N is not an ancestor of any node in 'common'

463

434

464

The list is sorted by revision number, meaning it is

435

The list is sorted by revision number, meaning it is

465

topologically sorted.

436

topologically sorted.

466

437

467

'heads' and 'common' are both lists of node IDs. If heads is

438

'heads' and 'common' are both lists of node IDs. If heads is

468

not supplied, uses all of the revlog's heads. If common is not

439

not supplied, uses all of the revlog's heads. If common is not

469

supplied, uses nullid."""

440

supplied, uses nullid."""

470

_common, missing = self.findcommonmissing(common, heads)

441

_common, missing = self.findcommonmissing(common, heads)

471

return missing

442

return missing

472

443

473

def nodesbetween(self, roots=None, heads=None):

444

def nodesbetween(self, roots=None, heads=None):

474

"""Return a topological path from 'roots' to 'heads'.

445

"""Return a topological path from 'roots' to 'heads'.

475

446

476

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

447

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

477

topologically sorted list of all nodes N that satisfy both of

448

topologically sorted list of all nodes N that satisfy both of

478

these constraints:

449

these constraints:

479

450

480

1. N is a descendant of some node in 'roots'

451

1. N is a descendant of some node in 'roots'

481

2. N is an ancestor of some node in 'heads'

452

2. N is an ancestor of some node in 'heads'

482

453

483

Every node is considered to be both a descendant and an ancestor

454

Every node is considered to be both a descendant and an ancestor

484

of itself, so every reachable node in 'roots' and 'heads' will be

455

of itself, so every reachable node in 'roots' and 'heads' will be

485

included in 'nodes'.

456

included in 'nodes'.

486

457

487

'outroots' is the list of reachable nodes in 'roots', i.e., the

458

'outroots' is the list of reachable nodes in 'roots', i.e., the

488

subset of 'roots' that is returned in 'nodes'. Likewise,

459

subset of 'roots' that is returned in 'nodes'. Likewise,

489

'outheads' is the subset of 'heads' that is also in 'nodes'.

460

'outheads' is the subset of 'heads' that is also in 'nodes'.

490

461

491

'roots' and 'heads' are both lists of node IDs. If 'roots' is

462

'roots' and 'heads' are both lists of node IDs. If 'roots' is

492

unspecified, uses nullid as the only root. If 'heads' is

463

unspecified, uses nullid as the only root. If 'heads' is

493

unspecified, uses list of all of the revlog's heads."""

464

unspecified, uses list of all of the revlog's heads."""

494

nonodes = ([], [], [])

465

nonodes = ([], [], [])

495

if roots is not None:

466

if roots is not None:

496

roots = list(roots)

467

roots = list(roots)

497

if not roots:

468

if not roots:

498

return nonodes

469

return nonodes

499

lowestrev = min([self.rev(n) for n in roots])

470

lowestrev = min([self.rev(n) for n in roots])

500

else:

471

else:

501

roots = [nullid] # Everybody's a descendant of nullid

472

roots = [nullid] # Everybody's a descendant of nullid

502

lowestrev = nullrev

473

lowestrev = nullrev

503

if (lowestrev == nullrev) and (heads is None):

474

if (lowestrev == nullrev) and (heads is None):

504

# We want _all_ the nodes!

475

# We want _all_ the nodes!

505

return ([self.node(r) for r in self], [nullid], list(self.heads()))

476

return ([self.node(r) for r in self], [nullid], list(self.heads()))

506

if heads is None:

477

if heads is None:

507

# All nodes are ancestors, so the latest ancestor is the last

478

# All nodes are ancestors, so the latest ancestor is the last

508

# node.

479

# node.

509

highestrev = len(self) - 1

480

highestrev = len(self) - 1

510

# Set ancestors to None to signal that every node is an ancestor.

481

# Set ancestors to None to signal that every node is an ancestor.

511

ancestors = None

482

ancestors = None

512

# Set heads to an empty dictionary for later discovery of heads

483

# Set heads to an empty dictionary for later discovery of heads

513

heads = {}

484

heads = {}

514

else:

485

else:

515

heads = list(heads)

486

heads = list(heads)

516

if not heads:

487

if not heads:

517

return nonodes

488

return nonodes

518

ancestors = set()

489

ancestors = set()

519

# Turn heads into a dictionary so we can remove 'fake' heads.

490

# Turn heads into a dictionary so we can remove 'fake' heads.

520

# Also, later we will be using it to filter out the heads we can't

491

# Also, later we will be using it to filter out the heads we can't

521

# find from roots.

492

# find from roots.

522

heads = dict.fromkeys(heads, False)

493

heads = dict.fromkeys(heads, False)

523

# Start at the top and keep marking parents until we're done.

494

# Start at the top and keep marking parents until we're done.

524

nodestotag = set(heads)

495

nodestotag = set(heads)

525

# Remember where the top was so we can use it as a limit later.

496

# Remember where the top was so we can use it as a limit later.

526

highestrev = max([self.rev(n) for n in nodestotag])

497

highestrev = max([self.rev(n) for n in nodestotag])

527

while nodestotag:

498

while nodestotag:

528

# grab a node to tag

499

# grab a node to tag

529

n = nodestotag.pop()

500

n = nodestotag.pop()

530

# Never tag nullid

501

# Never tag nullid

531

if n == nullid:

502

if n == nullid:

532

continue

503

continue

533

# A node's revision number represents its place in a

504

# A node's revision number represents its place in a

534

# topologically sorted list of nodes.

505

# topologically sorted list of nodes.

535

r = self.rev(n)

506

r = self.rev(n)

536

if r >= lowestrev:

507

if r >= lowestrev:

537

if n not in ancestors:

508

if n not in ancestors:

538

# If we are possibly a descendant of one of the roots

509

# If we are possibly a descendant of one of the roots

539

# and we haven't already been marked as an ancestor

510

# and we haven't already been marked as an ancestor

540

ancestors.add(n) # Mark as ancestor

511

ancestors.add(n) # Mark as ancestor

541

# Add non-nullid parents to list of nodes to tag.

512

# Add non-nullid parents to list of nodes to tag.

542

nodestotag.update([p for p in self.parents(n) if

513

nodestotag.update([p for p in self.parents(n) if

543

p != nullid])

514

p != nullid])

544

elif n in heads: # We've seen it before, is it a fake head?

515

elif n in heads: # We've seen it before, is it a fake head?

545

# So it is, real heads should not be the ancestors of

516

# So it is, real heads should not be the ancestors of

546

# any other heads.

517

# any other heads.

547

heads.pop(n)

518

heads.pop(n)

548

if not ancestors:

519

if not ancestors:

549

return nonodes

520

return nonodes

550

# Now that we have our set of ancestors, we want to remove any

521

# Now that we have our set of ancestors, we want to remove any

551

# roots that are not ancestors.

522

# roots that are not ancestors.

552

523

553

# If one of the roots was nullid, everything is included anyway.

524

# If one of the roots was nullid, everything is included anyway.

554

if lowestrev > nullrev:

525

if lowestrev > nullrev:

555

# But, since we weren't, let's recompute the lowest rev to not

526

# But, since we weren't, let's recompute the lowest rev to not

556

# include roots that aren't ancestors.

527

# include roots that aren't ancestors.

557

528

558

# Filter out roots that aren't ancestors of heads

529

# Filter out roots that aren't ancestors of heads

559

roots = [n for n in roots if n in ancestors]

530

roots = [n for n in roots if n in ancestors]

560

# Recompute the lowest revision

531

# Recompute the lowest revision

561

if roots:

532

if roots:

562

lowestrev = min([self.rev(n) for n in roots])

533

lowestrev = min([self.rev(n) for n in roots])

563

else:

534

else:

564

# No more roots? Return empty list

535

# No more roots? Return empty list

565

return nonodes

536

return nonodes

566

else:

537

else:

567

# We are descending from nullid, and don't need to care about

538

# We are descending from nullid, and don't need to care about

568

# any other roots.

539

# any other roots.

569

lowestrev = nullrev

540

lowestrev = nullrev

570

roots = [nullid]

541

roots = [nullid]

571

# Transform our roots list into a set.

542

# Transform our roots list into a set.

572

descendants = set(roots)

543

descendants = set(roots)

573

# Also, keep the original roots so we can filter out roots that aren't

544

# Also, keep the original roots so we can filter out roots that aren't

574

# 'real' roots (i.e. are descended from other roots).

545

# 'real' roots (i.e. are descended from other roots).

575

roots = descendants.copy()

546

roots = descendants.copy()

576

# Our topologically sorted list of output nodes.

547

# Our topologically sorted list of output nodes.

577

orderedout = []

548

orderedout = []

578

# Don't start at nullid since we don't want nullid in our output list,

549

# Don't start at nullid since we don't want nullid in our output list,

579

# and if nullid shows up in descedents, empty parents will look like

550

# and if nullid shows up in descedents, empty parents will look like

580

# they're descendants.

551

# they're descendants.

581

for r in xrange(max(lowestrev, 0), highestrev + 1):

552

for r in xrange(max(lowestrev, 0), highestrev + 1):

582

n = self.node(r)

553

n = self.node(r)

583

isdescendant = False

554

isdescendant = False

584

if lowestrev == nullrev: # Everybody is a descendant of nullid

555

if lowestrev == nullrev: # Everybody is a descendant of nullid

585

isdescendant = True

556

isdescendant = True

586

elif n in descendants:

557

elif n in descendants:

587

# n is already a descendant

558

# n is already a descendant

588

isdescendant = True

559

isdescendant = True

589

# This check only needs to be done here because all the roots

560

# This check only needs to be done here because all the roots

590

# will start being marked is descendants before the loop.

561

# will start being marked is descendants before the loop.

591

if n in roots:

562

if n in roots:

592

# If n was a root, check if it's a 'real' root.

563

# If n was a root, check if it's a 'real' root.

593

p = tuple(self.parents(n))

564

p = tuple(self.parents(n))

594

# If any of its parents are descendants, it's not a root.

565

# If any of its parents are descendants, it's not a root.

595

if (p[0] in descendants) or (p[1] in descendants):

566

if (p[0] in descendants) or (p[1] in descendants):

596

roots.remove(n)

567

roots.remove(n)

597

else:

568

else:

598

p = tuple(self.parents(n))

569

p = tuple(self.parents(n))

599

# A node is a descendant if either of its parents are

570

# A node is a descendant if either of its parents are

600

# descendants. (We seeded the dependents list with the roots

571

# descendants. (We seeded the dependents list with the roots

601

# up there, remember?)

572

# up there, remember?)

602

if (p[0] in descendants) or (p[1] in descendants):

573

if (p[0] in descendants) or (p[1] in descendants):

603

descendants.add(n)

574

descendants.add(n)

604

isdescendant = True

575

isdescendant = True

605

if isdescendant and ((ancestors is None) or (n in ancestors)):

576

if isdescendant and ((ancestors is None) or (n in ancestors)):

606

# Only include nodes that are both descendants and ancestors.

577

# Only include nodes that are both descendants and ancestors.

607

orderedout.append(n)

578

orderedout.append(n)

608

if (ancestors is not None) and (n in heads):

579

if (ancestors is not None) and (n in heads):

609

# We're trying to figure out which heads are reachable

580

# We're trying to figure out which heads are reachable

610

# from roots.

581

# from roots.

611

# Mark this head as having been reached

582

# Mark this head as having been reached

612

heads[n] = True

583

heads[n] = True

613

elif ancestors is None:

584

elif ancestors is None:

614

# Otherwise, we're trying to discover the heads.

585

# Otherwise, we're trying to discover the heads.

615

# Assume this is a head because if it isn't, the next step

586

# Assume this is a head because if it isn't, the next step

616

# will eventually remove it.

587

# will eventually remove it.

617

heads[n] = True

588

heads[n] = True

618

# But, obviously its parents aren't.

589

# But, obviously its parents aren't.

619

for p in self.parents(n):

590

for p in self.parents(n):

620

heads.pop(p, None)

591

heads.pop(p, None)

621

heads = [n for n, flag in heads.iteritems() if flag]

592

heads = [n for n, flag in heads.iteritems() if flag]

622

roots = list(roots)

593

roots = list(roots)

623

assert orderedout

594

assert orderedout

624

assert roots

595

assert roots

625

assert heads

596

assert heads

626

return (orderedout, roots, heads)

597

return (orderedout, roots, heads)

627

598

628

def headrevs(self):

599

def headrevs(self):

629

try:

600

try:

630

return self.index.headrevs()

601

return self.index.headrevs()

631

except AttributeError:

602

except AttributeError:

632

pass

603

pass

633

count = len(self)

604

count = len(self)

634

if not count:

605

if not count:

635

return [nullrev]

606

return [nullrev]

636

ishead = [1] * (count + 1)

607

ishead = [1] * (count + 1)

637

index = self.index

608

index = self.index

638

for r in xrange(count):

609

for r in xrange(count):

639

e = index[r]

610

e = index[r]

640

ishead[e[5]] = ishead[e[6]] = 0

611

ishead[e[5]] = ishead[e[6]] = 0

641

return [r for r in xrange(count) if ishead[r]]

612

return [r for r in xrange(count) if ishead[r]]

642

613

643

def heads(self, start=None, stop=None):

614

def heads(self, start=None, stop=None):

644

"""return the list of all nodes that have no children

615

"""return the list of all nodes that have no children

645

616

646

if start is specified, only heads that are descendants of

617

if start is specified, only heads that are descendants of

647

start will be returned

618

start will be returned

648

if stop is specified, it will consider all the revs from stop

619

if stop is specified, it will consider all the revs from stop

649

as if they had no children

620

as if they had no children

650

"""

621

"""

651

if start is None and stop is None:

622

if start is None and stop is None:

652

if not len(self):

623

if not len(self):

653

return [nullid]

624

return [nullid]

654

return [self.node(r) for r in self.headrevs()]

625

return [self.node(r) for r in self.headrevs()]

655

626

656

if start is None:

627

if start is None:

657

start = nullid

628

start = nullid

658

if stop is None:

629

if stop is None:

659

stop = []

630

stop = []

660

stoprevs = set([self.rev(n) for n in stop])

631

stoprevs = set([self.rev(n) for n in stop])

661

startrev = self.rev(start)

632

startrev = self.rev(start)

662

reachable = set((startrev,))

633

reachable = set((startrev,))

663

heads = set((startrev,))

634

heads = set((startrev,))

664

635

665

parentrevs = self.parentrevs

636

parentrevs = self.parentrevs

666

for r in xrange(startrev + 1, len(self)):

637

for r in xrange(startrev + 1, len(self)):

667

for p in parentrevs(r):

638

for p in parentrevs(r):

668

if p in reachable:

639

if p in reachable:

669

if r not in stoprevs:

640

if r not in stoprevs:

670

reachable.add(r)

641

reachable.add(r)

671

heads.add(r)

642

heads.add(r)

672

if p in heads and p not in stoprevs:

643

if p in heads and p not in stoprevs:

673

heads.remove(p)

644

heads.remove(p)

674

645

675

return [self.node(r) for r in heads]

646

return [self.node(r) for r in heads]

676

647

677

def children(self, node):

648

def children(self, node):

678

"""find the children of a given node"""

649

"""find the children of a given node"""

679

c = []

650

c = []

680

p = self.rev(node)

651

p = self.rev(node)

681

for r in range(p + 1, len(self)):

652

for r in range(p + 1, len(self)):

682

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

653

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

683

if prevs:

654

if prevs:

684

for pr in prevs:

655

for pr in prevs:

685

if pr == p:

656

if pr == p:

686

c.append(self.node(r))

657

c.append(self.node(r))

687

elif p == nullrev:

658

elif p == nullrev:

688

c.append(self.node(r))

659

c.append(self.node(r))

689

return c

660

return c

690

661

691

def descendant(self, start, end):

662

def descendant(self, start, end):

692

if start == nullrev:

663

if start == nullrev:

693

return True

664

return True

694

for i in self.descendants([start]):

665

for i in self.descendants([start]):

695

if i == end:

666

if i == end:

696

return True

667

return True

697

elif i > end:

668

elif i > end:

698

break

669

break

699

return False

670

return False

700

671

701

def ancestor(self, a, b):

672

def ancestor(self, a, b):

702

"""calculate the least common ancestor of nodes a and b"""

673

"""calculate the least common ancestor of nodes a and b"""

703

674

704

# fast path, check if it is a descendant

675

# fast path, check if it is a descendant

705

a, b = self.rev(a), self.rev(b)

676

a, b = self.rev(a), self.rev(b)

706

start, end = sorted((a, b))

677

start, end = sorted((a, b))

707

if self.descendant(start, end):

678

if self.descendant(start, end):

708

return self.node(start)

679

return self.node(start)

709

680

710

def parents(rev):

681

def parents(rev):

711

return [p for p in self.parentrevs(rev) if p != nullrev]

682

return [p for p in self.parentrevs(rev) if p != nullrev]

712

683

713

c = ancestor.ancestor(a, b, parents)

684

c = ancestor.ancestor(a, b, parents)

714

if c is None:

685

if c is None:

715

return nullid

686

return nullid

716

687

717

return self.node(c)

688

return self.node(c)

718

689

719

def _match(self, id):

690

def _match(self, id):

720

if isinstance(id, int):

691

if isinstance(id, int):

721

# rev

692

# rev

722

return self.node(id)

693

return self.node(id)

723

if len(id) == 20:

694

if len(id) == 20:

724

# possibly a binary node

695

# possibly a binary node

725

# odds of a binary node being all hex in ASCII are 1 in 10**25

696

# odds of a binary node being all hex in ASCII are 1 in 10**25

726

try:

697

try:

727

node = id

698

node = id

728

self.rev(node) # quick search the index

699

self.rev(node) # quick search the index

729

return node

700

return node

730

except LookupError:

701

except LookupError:

731

pass # may be partial hex id

702

pass # may be partial hex id

732

try:

703

try:

733

# str(rev)

704

# str(rev)

734

rev = int(id)

705

rev = int(id)

735

if str(rev) != id:

706

if str(rev) != id:

736

raise ValueError

707

raise ValueError

737

if rev < 0:

708

if rev < 0:

738

rev = len(self) + rev

709

rev = len(self) + rev

739

if rev < 0 or rev >= len(self):

710

if rev < 0 or rev >= len(self):

740

raise ValueError

711

raise ValueError

741

return self.node(rev)

712

return self.node(rev)

742

except (ValueError, OverflowError):

713

except (ValueError, OverflowError):

743

pass

714

pass

744

if len(id) == 40:

715

if len(id) == 40:

745

try:

716

try:

746

# a full hex nodeid?

717

# a full hex nodeid?

747

node = bin(id)

718

node = bin(id)

748

self.rev(node)

719

self.rev(node)

749

return node

720

return node

750

except (TypeError, LookupError):

721

except (TypeError, LookupError):

751

pass

722

pass

752

723

753

def _partialmatch(self, id):

724

def _partialmatch(self, id):

754

try:

725

try:

755

return self.index.partialmatch(id)

726

return self.index.partialmatch(id)

756

except RevlogError:

727

except RevlogError:

757

# parsers.c radix tree lookup gave multiple matches

728

# parsers.c radix tree lookup gave multiple matches

758

raise LookupError(id, self.indexfile, _("ambiguous identifier"))

729

raise LookupError(id, self.indexfile, _("ambiguous identifier"))

759

except (AttributeError, ValueError):

730

except (AttributeError, ValueError):

760

# we are pure python, or key was too short to search radix tree

731

# we are pure python, or key was too short to search radix tree

761

pass

732

pass

762

733

763

if id in self._pcache:

734

if id in self._pcache:

764

return self._pcache[id]

735

return self._pcache[id]

765

736

766

if len(id) < 40:

737

if len(id) < 40:

767

try:

738

try:

768

# hex(node)[:...]

739

# hex(node)[:...]

769

l = len(id) // 2 # grab an even number of digits

740

l = len(id) // 2 # grab an even number of digits

770

prefix = bin(id[:l * 2])

741

prefix = bin(id[:l * 2])

771

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

742

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

772

nl = [n for n in nl if hex(n).startswith(id)]

743

nl = [n for n in nl if hex(n).startswith(id)]

773

if len(nl) > 0:

744

if len(nl) > 0:

774

if len(nl) == 1:

745

if len(nl) == 1:

775

self._pcache[id] = nl[0]

746

self._pcache[id] = nl[0]

776

return nl[0]

747

return nl[0]

777

raise LookupError(id, self.indexfile,

748

raise LookupError(id, self.indexfile,

778

_('ambiguous identifier'))

749

_('ambiguous identifier'))

779

return None

750

return None

780

except TypeError:

751

except TypeError:

781

pass

752

pass

782

753

783

def lookup(self, id):

754

def lookup(self, id):

784

"""locate a node based on:

755

"""locate a node based on:

785

- revision number or str(revision number)

756

- revision number or str(revision number)

786

- nodeid or subset of hex nodeid

757

- nodeid or subset of hex nodeid

787

"""

758

"""

788

n = self._match(id)

759

n = self._match(id)

789

if n is not None:

760

if n is not None:

790

return n

761

return n

791

n = self._partialmatch(id)

762

n = self._partialmatch(id)

792

if n:

763

if n:

793

return n

764

return n

794

765

795

raise LookupError(id, self.indexfile, _('no match found'))

766

raise LookupError(id, self.indexfile, _('no match found'))

796

767

797

def cmp(self, node, text):

768

def cmp(self, node, text):

798

"""compare text with a given file revision

769

"""compare text with a given file revision

799

770

800

returns True if text is different than what is stored.

771

returns True if text is different than what is stored.

801

"""

772

"""

802

p1, p2 = self.parents(node)

773

p1, p2 = self.parents(node)

803

return hash(text, p1, p2) != node

774

return hash(text, p1, p2) != node

804

775

805

def _addchunk(self, offset, data):

776

def _addchunk(self, offset, data):

806

o, d = self._chunkcache

777

o, d = self._chunkcache

807

# try to add to existing cache

778

# try to add to existing cache

808

if o + len(d) == offset and len(d) + len(data) < _chunksize:

779

if o + len(d) == offset and len(d) + len(data) < _chunksize:

809

self._chunkcache = o, d + data

780

self._chunkcache = o, d + data

810

else:

781

else:

811

self._chunkcache = offset, data

782

self._chunkcache = offset, data

812

783

813

def _loadchunk(self, offset, length):

784

def _loadchunk(self, offset, length):

814

if self._inline:

785

if self._inline:

815

df = self.opener(self.indexfile)

786

df = self.opener(self.indexfile)

816

else:

787

else:

817

df = self.opener(self.datafile)

788

df = self.opener(self.datafile)

818

789

819

readahead = max(65536, length)

790

readahead = max(65536, length)

820

df.seek(offset)

791

df.seek(offset)

821

d = df.read(readahead)

792

d = df.read(readahead)

822

df.close()

793

df.close()

823

self._addchunk(offset, d)

794

self._addchunk(offset, d)

824

if readahead > length:

795

if readahead > length:

825

return util.buffer(d, 0, length)

796

return util.buffer(d, 0, length)

826

return d

797

return d

827

798

828

def _getchunk(self, offset, length):

799

def _getchunk(self, offset, length):

829

o, d = self._chunkcache

800

o, d = self._chunkcache

830

l = len(d)

801

l = len(d)

831

802

832

# is it in the cache?

803

# is it in the cache?

833

cachestart = offset - o

804

cachestart = offset - o

834

cacheend = cachestart + length

805

cacheend = cachestart + length

835

if cachestart >= 0 and cacheend <= l:

806

if cachestart >= 0 and cacheend <= l:

836

if cachestart == 0 and cacheend == l:

807

if cachestart == 0 and cacheend == l:

837

return d # avoid a copy

808

return d # avoid a copy

838

return util.buffer(d, cachestart, cacheend - cachestart)

809

return util.buffer(d, cachestart, cacheend - cachestart)

839

810

840

return self._loadchunk(offset, length)

811

return self._loadchunk(offset, length)

841

812

842

def _chunkraw(self, startrev, endrev):

813

def _chunkraw(self, startrev, endrev):

843

start = self.start(startrev)

814

start = self.start(startrev)

844

length = self.end(endrev) - start

815

length = self.end(endrev) - start

845

if self._inline:

816

if self._inline:

846

start += (startrev + 1) * self._io.size

817

start += (startrev + 1) * self._io.size

847

return self._getchunk(start, length)

818

return self._getchunk(start, length)

848

819

849

def _chunk(self, rev):

820

def _chunk(self, rev):

850

return decompress(self._chunkraw(rev, rev))

821

return decompress(self._chunkraw(rev, rev))

851

822

852

def _chunkbase(self, rev):

823

def _chunkbase(self, rev):

853

return self._chunk(rev)

824

return self._chunk(rev)

854

825

855

def _chunkclear(self):

826

def _chunkclear(self):

856

self._chunkcache = (0, '')

827

self._chunkcache = (0, '')

857

828

858

def deltaparent(self, rev):

829

def deltaparent(self, rev):

859

"""return deltaparent of the given revision"""

830

"""return deltaparent of the given revision"""

860

base = self.index[rev][3]

831

base = self.index[rev][3]

861

if base == rev:

832

if base == rev:

862

return nullrev

833

return nullrev

863

elif self._generaldelta:

834

elif self._generaldelta:

864

return base

835

return base

865

else:

836

else:

866

return rev - 1

837

return rev - 1

867

838

868

def revdiff(self, rev1, rev2):

839

def revdiff(self, rev1, rev2):

869

"""return or calculate a delta between two revisions"""

840

"""return or calculate a delta between two revisions"""

870

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

841

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

871

return str(self._chunk(rev2))

842

return str(self._chunk(rev2))

872

843

873

return mdiff.textdiff(self.revision(rev1),

844

return mdiff.textdiff(self.revision(rev1),

874

self.revision(rev2))

845

self.revision(rev2))

875

846

876

def revision(self, nodeorrev):

847

def revision(self, nodeorrev):

877

"""return an uncompressed revision of a given node or revision

848

"""return an uncompressed revision of a given node or revision

878

number.

849

number.

879

"""

850

"""

880

if isinstance(nodeorrev, int):

851

if isinstance(nodeorrev, int):

881

rev = nodeorrev

852

rev = nodeorrev

882

node = self.node(rev)

853

node = self.node(rev)

883

else:

854

else:

884

node = nodeorrev

855

node = nodeorrev

885

rev = None

856

rev = None

886

857

887

cachedrev = None

858

cachedrev = None

888

if node == nullid:

859

if node == nullid:

889

return ""

860

return ""

890

if self._cache:

861

if self._cache:

891

if self._cache[0] == node:

862

if self._cache[0] == node:

892

return self._cache[2]

863

return self._cache[2]

893

cachedrev = self._cache[1]

864

cachedrev = self._cache[1]

894

865

895

# look up what we need to read

866

# look up what we need to read

896

text = None

867

text = None

897

if rev is None:

868

if rev is None:

898

rev = self.rev(node)

869

rev = self.rev(node)

899

870

900

# check rev flags

871

# check rev flags

901

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

872

if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:

902

raise RevlogError(_('incompatible revision flag %x') %

873

raise RevlogError(_('incompatible revision flag %x') %

903

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

874

(self.flags(rev) & ~REVIDX_KNOWN_FLAGS))

904

875

905

# build delta chain

876

# build delta chain

906

chain = []

877

chain = []

907

index = self.index # for performance

878

index = self.index # for performance

908

generaldelta = self._generaldelta

879

generaldelta = self._generaldelta

909

iterrev = rev

880

iterrev = rev

910

e = index[iterrev]

881

e = index[iterrev]

911

while iterrev != e[3] and iterrev != cachedrev:

882

while iterrev != e[3] and iterrev != cachedrev:

912

chain.append(iterrev)

883

chain.append(iterrev)

913

if generaldelta:

884

if generaldelta:

914

iterrev = e[3]

885

iterrev = e[3]

915

else:

886

else:

916

iterrev -= 1

887

iterrev -= 1

917

e = index[iterrev]

888

e = index[iterrev]

918

chain.reverse()

889

chain.reverse()

919

base = iterrev

890

base = iterrev

920

891

921

if iterrev == cachedrev:

892

if iterrev == cachedrev:

922

# cache hit

893

# cache hit

923

text = self._cache[2]

894

text = self._cache[2]

924

895

925

# drop cache to save memory

896

# drop cache to save memory

926

self._cache = None

897

self._cache = None

927

898

928

self._chunkraw(base, rev)

899

self._chunkraw(base, rev)

929

if text is None:

900

if text is None:

930

text = str(self._chunkbase(base))

901

text = str(self._chunkbase(base))

931

902

932

bins = [self._chunk(r) for r in chain]

903

bins = [self._chunk(r) for r in chain]

933

text = mdiff.patches(text, bins)

904

text = mdiff.patches(text, bins)

934

905

935

text = self._checkhash(text, node, rev)

906

text = self._checkhash(text, node, rev)

936

907

937

self._cache = (node, rev, text)

908

self._cache = (node, rev, text)

938

return text

909

return text

939

910

940

def _checkhash(self, text, node, rev):

911

def _checkhash(self, text, node, rev):

941

p1, p2 = self.parents(node)

912

p1, p2 = self.parents(node)

942

if node != hash(text, p1, p2):

913

if node != hash(text, p1, p2):

943

raise RevlogError(_("integrity check failed on %s:%d")

914

raise RevlogError(_("integrity check failed on %s:%d")

944

% (self.indexfile, rev))

915

% (self.indexfile, rev))

945

return text

916

return text

946

917

947

def checkinlinesize(self, tr, fp=None):

918

def checkinlinesize(self, tr, fp=None):

948

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

919

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

949

return

920

return

950

921

951

trinfo = tr.find(self.indexfile)

922

trinfo = tr.find(self.indexfile)

952

if trinfo is None:

923

if trinfo is None:

953

raise RevlogError(_("%s not found in the transaction")

924

raise RevlogError(_("%s not found in the transaction")

954

% self.indexfile)

925

% self.indexfile)

955

926

956

trindex = trinfo[2]

927

trindex = trinfo[2]

957

dataoff = self.start(trindex)

928

dataoff = self.start(trindex)

958

929

959

tr.add(self.datafile, dataoff)

930

tr.add(self.datafile, dataoff)

960

931

961

if fp:

932

if fp:

962

fp.flush()

933

fp.flush()

963

fp.close()

934

fp.close()

964

935

965

df = self.opener(self.datafile, 'w')

936

df = self.opener(self.datafile, 'w')

966

try:

937

try:

967

for r in self:

938

for r in self:

968

df.write(self._chunkraw(r, r))

939

df.write(self._chunkraw(r, r))

969

finally:

940

finally:

970

df.close()

941

df.close()

971

942

972

fp = self.opener(self.indexfile, 'w', atomictemp=True)

943

fp = self.opener(self.indexfile, 'w', atomictemp=True)

973

self.version &= ~(REVLOGNGINLINEDATA)

944

self.version &= ~(REVLOGNGINLINEDATA)

974

self._inline = False

945

self._inline = False

975

for i in self:

946

for i in self:

976

e = self._io.packentry(self.index[i], self.node, self.version, i)

947

e = self._io.packentry(self.index[i], self.node, self.version, i)

977

fp.write(e)

948

fp.write(e)

978

949

979

# if we don't call close, the temp file will never replace the

950

# if we don't call close, the temp file will never replace the

980

# real index

951

# real index

981

fp.close()

952

fp.close()

982

953

983

tr.replace(self.indexfile, trindex * self._io.size)

954

tr.replace(self.indexfile, trindex * self._io.size)

984

self._chunkclear()

955

self._chunkclear()

985

956

986

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

957

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):

987

"""add a revision to the log

958

"""add a revision to the log

988

959

989

text - the revision data to add

960

text - the revision data to add

990

transaction - the transaction object used for rollback

961

transaction - the transaction object used for rollback

991

link - the linkrev data to add

962

link - the linkrev data to add

992

p1, p2 - the parent nodeids of the revision

963

p1, p2 - the parent nodeids of the revision

993

cachedelta - an optional precomputed delta

964

cachedelta - an optional precomputed delta

994

"""

965

"""

995

node = hash(text, p1, p2)

966

node = hash(text, p1, p2)

996

if node in self.nodemap:

967

if node in self.nodemap:

997

return node

968

return node

998

969

999

dfh = None

970

dfh = None

1000

if not self._inline:

971

if not self._inline:

1001

dfh = self.opener(self.datafile, "a")

972

dfh = self.opener(self.datafile, "a")

1002

ifh = self.opener(self.indexfile, "a+")

973

ifh = self.opener(self.indexfile, "a+")

1003

try:

974

try:

1004

return self._addrevision(node, text, transaction, link, p1, p2,

975

return self._addrevision(node, text, transaction, link, p1, p2,

1005

cachedelta, ifh, dfh)

976

cachedelta, ifh, dfh)

1006

finally:

977

finally:

1007

if dfh:

978

if dfh:

1008

dfh.close()

979

dfh.close()

1009

ifh.close()

980

ifh.close()

1010

981

982

def compress(self, text):

983

""" generate a possibly-compressed representation of text """

984

if not text:

985

return ("", text)

986

l = len(text)

987

bin = None

988

if l < 44:

989

pass

990

elif l > 1000000:

991

# zlib makes an internal copy, thus doubling memory usage for

992

# large files, so lets do this in pieces

993

z = zlib.compressobj()

994

p = []

995

pos = 0

996

while pos < l:

997

pos2 = pos + 2**20

998

p.append(z.compress(text[pos:pos2]))

999

pos = pos2

1000

p.append(z.flush())

1001

if sum(map(len, p)) < l:

1002

bin = "".join(p)

1003

else:

1004

bin = _compress(text)

1005

if bin is None or len(bin) > l:

1006

if text[0] == '\0':

1007

return ("", text)

1008

return ('u', text)

1009

return ("", bin)

1010

1011

def _addrevision(self, node, text, transaction, link, p1, p2,

1011

def _addrevision(self, node, text, transaction, link, p1, p2,

1012

cachedelta, ifh, dfh):

1012

cachedelta, ifh, dfh):

1013

"""internal function to add revisions to the log

1013

"""internal function to add revisions to the log

1014

1015

see addrevision for argument descriptions.

1015

see addrevision for argument descriptions.

1016

invariants:

1016

invariants:

1017

- text is optional (can be None); if not set, cachedelta must be set.

1017

- text is optional (can be None); if not set, cachedelta must be set.

1018

if both are set, they must correspond to eachother.

1018

if both are set, they must correspond to eachother.

1019

"""

1019

"""

1020

btext = [text]

1020

btext = [text]

1021

def buildtext():

1021

def buildtext():

1022

if btext[0] is not None:

1022

if btext[0] is not None:

1023

return btext[0]

1023

return btext[0]

1024

# flush any pending writes here so we can read it in revision

1024

# flush any pending writes here so we can read it in revision

1025

if dfh:

1025

if dfh:

1026

dfh.flush()

1026

dfh.flush()

1027

ifh.flush()

1027

ifh.flush()

1028

basetext = self.revision(self.node(cachedelta[0]))

1028

basetext = self.revision(self.node(cachedelta[0]))

1029

btext[0] = mdiff.patch(basetext, cachedelta[1])

1029

btext[0] = mdiff.patch(basetext, cachedelta[1])

1030

chk = hash(btext[0], p1, p2)

1030

chk = hash(btext[0], p1, p2)

1031

if chk != node:

1031

if chk != node:

1032

raise RevlogError(_("consistency error in delta"))

1032

raise RevlogError(_("consistency error in delta"))

1033

return btext[0]

1033

return btext[0]

1034

1035

def builddelta(rev):

1035

def builddelta(rev):

1036

# can we use the cached delta?

1036

# can we use the cached delta?

1037

if cachedelta and cachedelta[0] == rev:

1037

if cachedelta and cachedelta[0] == rev:

1038

delta = cachedelta[1]

1038

delta = cachedelta[1]

1039

else:

1039

else:

1040

t = buildtext()

1040

t = buildtext()

1041

ptext = self.revision(self.node(rev))

1041

ptext = self.revision(self.node(rev))

1042

delta = mdiff.textdiff(ptext, t)

1042

delta = mdiff.textdiff(ptext, t)

1043

data = compress(delta)

1043

data = self.compress(delta)

1044

l = len(data[1]) + len(data[0])

1044

l = len(data[1]) + len(data[0])

1045

if basecache[0] == rev:

1045

if basecache[0] == rev:

1046

chainbase = basecache[1]

1046

chainbase = basecache[1]

1047

else:

1047

else:

1048

chainbase = self.chainbase(rev)

1048

chainbase = self.chainbase(rev)

1049

dist = l + offset - self.start(chainbase)

1049

dist = l + offset - self.start(chainbase)

1050

if self._generaldelta:

1050

if self._generaldelta:

1051

base = rev

1051

base = rev

1052

else:

1052

else:

1053

base = chainbase

1053

base = chainbase

1054

return dist, l, data, base, chainbase

1054

return dist, l, data, base, chainbase

1055

1056

curr = len(self)

1056

curr = len(self)

1057

prev = curr - 1

1057

prev = curr - 1

1058

base = chainbase = curr

1058

base = chainbase = curr

1059

offset = self.end(prev)

1059

offset = self.end(prev)

1060

flags = 0

1060

flags = 0

1061

d = None

1061

d = None

1062

basecache = self._basecache

1062

basecache = self._basecache

1063

p1r, p2r = self.rev(p1), self.rev(p2)

1063

p1r, p2r = self.rev(p1), self.rev(p2)

1064

1065

# should we try to build a delta?

1065

# should we try to build a delta?

1066

if prev != nullrev:

1066

if prev != nullrev:

1067

if self._generaldelta:

1067

if self._generaldelta:

1068

if p1r >= basecache[1]:

1068

if p1r >= basecache[1]:

1069

d = builddelta(p1r)

1069

d = builddelta(p1r)

1070

elif p2r >= basecache[1]:

1070

elif p2r >= basecache[1]:

1071

d = builddelta(p2r)

1071

d = builddelta(p2r)

1072

else:

1072

else:

1073

d = builddelta(prev)

1073

d = builddelta(prev)

1074

else:

1074

else:

1075

d = builddelta(prev)

1075

d = builddelta(prev)

1076

dist, l, data, base, chainbase = d

1076

dist, l, data, base, chainbase = d

1077

1078

# full versions are inserted when the needed deltas

1078

# full versions are inserted when the needed deltas

1079

# become comparable to the uncompressed text

1079

# become comparable to the uncompressed text

1080

if text is None:

1080

if text is None:

1081

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1081

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1082

cachedelta[1])

1082

cachedelta[1])

1083

else:

1083

else:

1084

textlen = len(text)

1084

textlen = len(text)

1085

if d is None or dist > textlen * 2:

1085

if d is None or dist > textlen * 2:

1086

text = buildtext()

1086

text = buildtext()

1087

data = compress(text)

1087

data = self.compress(text)

1088

l = len(data[1]) + len(data[0])

1088

l = len(data[1]) + len(data[0])

1089

base = chainbase = curr

1089

base = chainbase = curr

1090

1091

e = (offset_type(offset, flags), l, textlen,

1091

e = (offset_type(offset, flags), l, textlen,

1092

base, link, p1r, p2r, node)

1092

base, link, p1r, p2r, node)

1093

self.index.insert(-1, e)

1093

self.index.insert(-1, e)

1094

self.nodemap[node] = curr

1094

self.nodemap[node] = curr

1095

1096

entry = self._io.packentry(e, self.node, self.version, curr)

1096

entry = self._io.packentry(e, self.node, self.version, curr)

1097

if not self._inline:

1097

if not self._inline:

1098

transaction.add(self.datafile, offset)

1098

transaction.add(self.datafile, offset)

1099

transaction.add(self.indexfile, curr * len(entry))

1099

transaction.add(self.indexfile, curr * len(entry))

1100

if data[0]:

1100

if data[0]:

1101

dfh.write(data[0])

1101

dfh.write(data[0])

1102

dfh.write(data[1])

1102

dfh.write(data[1])

1103

dfh.flush()

1103

dfh.flush()

1104

ifh.write(entry)

1104

ifh.write(entry)

1105

else:

1105

else:

1106

offset += curr * self._io.size

1106

offset += curr * self._io.size

1107

transaction.add(self.indexfile, offset, curr)

1107

transaction.add(self.indexfile, offset, curr)

1108

ifh.write(entry)

1108

ifh.write(entry)

1109

ifh.write(data[0])

1109

ifh.write(data[0])

1110

ifh.write(data[1])

1110

ifh.write(data[1])

1111

self.checkinlinesize(transaction, ifh)

1111

self.checkinlinesize(transaction, ifh)

1112

1113

if type(text) == str: # only accept immutable objects

1113

if type(text) == str: # only accept immutable objects

1114

self._cache = (node, curr, text)

1114

self._cache = (node, curr, text)

1115

self._basecache = (curr, chainbase)

1115

self._basecache = (curr, chainbase)

1116

return node

1116

return node

1117

1118

def group(self, nodelist, bundler, reorder=None):

1118

def group(self, nodelist, bundler, reorder=None):

1119

"""Calculate a delta group, yielding a sequence of changegroup chunks

1119

"""Calculate a delta group, yielding a sequence of changegroup chunks

1120

(strings).

1120

(strings).

1121

1122

Given a list of changeset revs, return a set of deltas and

1122

Given a list of changeset revs, return a set of deltas and

1123

metadata corresponding to nodes. The first delta is

1123

metadata corresponding to nodes. The first delta is

1124

first parent(nodelist[0]) -> nodelist[0], the receiver is

1124

first parent(nodelist[0]) -> nodelist[0], the receiver is

1125

guaranteed to have this parent as it has all history before

1125

guaranteed to have this parent as it has all history before

1126

these changesets. In the case firstparent is nullrev the

1126

these changesets. In the case firstparent is nullrev the

1127

changegroup starts with a full revision.

1127

changegroup starts with a full revision.

1128

"""

1128

"""

1129

1130

# if we don't have any revisions touched by these changesets, bail

1130

# if we don't have any revisions touched by these changesets, bail

1131

if len(nodelist) == 0:

1131

if len(nodelist) == 0:

1132

yield bundler.close()

1132

yield bundler.close()

1133

return

1133

return

1134

1135

# for generaldelta revlogs, we linearize the revs; this will both be

1135

# for generaldelta revlogs, we linearize the revs; this will both be

1136

# much quicker and generate a much smaller bundle

1136

# much quicker and generate a much smaller bundle

1137

if (self._generaldelta and reorder is not False) or reorder:

1137

if (self._generaldelta and reorder is not False) or reorder:

1138

dag = dagutil.revlogdag(self)

1138

dag = dagutil.revlogdag(self)

1139

revs = set(self.rev(n) for n in nodelist)

1139

revs = set(self.rev(n) for n in nodelist)

1140

revs = dag.linearize(revs)

1140

revs = dag.linearize(revs)

1141

else:

1141

else:

1142

revs = sorted([self.rev(n) for n in nodelist])

1142

revs = sorted([self.rev(n) for n in nodelist])

1143

1144

# add the parent of the first rev

1144

# add the parent of the first rev

1145

p = self.parentrevs(revs[0])[0]

1145

p = self.parentrevs(revs[0])[0]

1146

revs.insert(0, p)

1146

revs.insert(0, p)

1147

1148

# build deltas

1148

# build deltas

1149

for r in xrange(len(revs) - 1):

1149

for r in xrange(len(revs) - 1):

1150

prev, curr = revs[r], revs[r + 1]

1150

prev, curr = revs[r], revs[r + 1]

1151

for c in bundler.revchunk(self, curr, prev):

1151

for c in bundler.revchunk(self, curr, prev):

1152

yield c

1152

yield c

1153

1154

yield bundler.close()

1154

yield bundler.close()

1155

1156

def addgroup(self, bundle, linkmapper, transaction):

1156

def addgroup(self, bundle, linkmapper, transaction):

1157

"""

1157

"""

1158

add a delta group

1158

add a delta group

1159

1160

given a set of deltas, add them to the revision log. the

1160

given a set of deltas, add them to the revision log. the

1161

first delta is against its parent, which should be in our

1161

first delta is against its parent, which should be in our

1162

log, the rest are against the previous delta.

1162

log, the rest are against the previous delta.

1163

"""

1163

"""

1164

1165

# track the base of the current delta log

1165

# track the base of the current delta log

1166

content = []

1166

content = []

1167

node = None

1167

node = None

1168

1169

r = len(self)

1169

r = len(self)

1170

end = 0

1170

end = 0

1171

if r:

1171

if r:

1172

end = self.end(r - 1)

1172

end = self.end(r - 1)

1173

ifh = self.opener(self.indexfile, "a+")

1173

ifh = self.opener(self.indexfile, "a+")

1174

isize = r * self._io.size

1174

isize = r * self._io.size

1175

if self._inline:

1175

if self._inline:

1176

transaction.add(self.indexfile, end + isize, r)

1176

transaction.add(self.indexfile, end + isize, r)

1177

dfh = None

1177

dfh = None

1178

else:

1178

else:

1179

transaction.add(self.indexfile, isize, r)

1179

transaction.add(self.indexfile, isize, r)

1180

transaction.add(self.datafile, end)

1180

transaction.add(self.datafile, end)

1181

dfh = self.opener(self.datafile, "a")

1181

dfh = self.opener(self.datafile, "a")

1182

1183

try:

1183

try:

1184

# loop through our set of deltas

1184

# loop through our set of deltas

1185

chain = None

1185

chain = None

1186

while True:

1186

while True:

1187

chunkdata = bundle.deltachunk(chain)

1187

chunkdata = bundle.deltachunk(chain)

1188

if not chunkdata:

1188

if not chunkdata:

1189

break

1189

break

1190

node = chunkdata['node']

1190

node = chunkdata['node']

1191

p1 = chunkdata['p1']

1191

p1 = chunkdata['p1']

1192

p2 = chunkdata['p2']

1192

p2 = chunkdata['p2']

1193

cs = chunkdata['cs']

1193

cs = chunkdata['cs']

1194

deltabase = chunkdata['deltabase']

1194

deltabase = chunkdata['deltabase']

1195

delta = chunkdata['delta']

1195

delta = chunkdata['delta']

1196

1197

content.append(node)

1197

content.append(node)

1198

1199

link = linkmapper(cs)

1199

link = linkmapper(cs)

1200

if node in self.nodemap:

1200

if node in self.nodemap:

1201

# this can happen if two branches make the same change

1201

# this can happen if two branches make the same change

1202

chain = node

1202

chain = node

1203

continue

1203

continue

1204

1205

for p in (p1, p2):

1205

for p in (p1, p2):

1206

if p not in self.nodemap:

1206

if p not in self.nodemap:

1207

raise LookupError(p, self.indexfile,

1207

raise LookupError(p, self.indexfile,

1208

_('unknown parent'))

1208

_('unknown parent'))

1209

1210

if deltabase not in self.nodemap:

1210

if deltabase not in self.nodemap:

1211

raise LookupError(deltabase, self.indexfile,

1211

raise LookupError(deltabase, self.indexfile,

1212

_('unknown delta base'))

1212

_('unknown delta base'))

1213

1214

baserev = self.rev(deltabase)

1214

baserev = self.rev(deltabase)

1215

chain = self._addrevision(node, None, transaction, link,

1215

chain = self._addrevision(node, None, transaction, link,

1216

p1, p2, (baserev, delta), ifh, dfh)

1216

p1, p2, (baserev, delta), ifh, dfh)

1217

if not dfh and not self._inline:

1217

if not dfh and not self._inline:

1218

# addrevision switched from inline to conventional

1218

# addrevision switched from inline to conventional

1219

# reopen the index

1219

# reopen the index

1220

ifh.close()

1220

ifh.close()

1221

dfh = self.opener(self.datafile, "a")

1221

dfh = self.opener(self.datafile, "a")

1222

ifh = self.opener(self.indexfile, "a")

1222

ifh = self.opener(self.indexfile, "a")

1223

finally:

1223

finally:

1224

if dfh:

1224

if dfh:

1225

dfh.close()

1225

dfh.close()

1226

ifh.close()

1226

ifh.close()

1227

1228

return content

1228

return content

1229

1230

def strip(self, minlink, transaction):

1230

def strip(self, minlink, transaction):

1231

"""truncate the revlog on the first revision with a linkrev >= minlink

1231

"""truncate the revlog on the first revision with a linkrev >= minlink

1232

1233

This function is called when we're stripping revision minlink and

1233

This function is called when we're stripping revision minlink and

1234

its descendants from the repository.

1234

its descendants from the repository.

1235

1236

We have to remove all revisions with linkrev >= minlink, because

1236

We have to remove all revisions with linkrev >= minlink, because

1237

the equivalent changelog revisions will be renumbered after the

1237

the equivalent changelog revisions will be renumbered after the

1238

strip.

1238

strip.

1239

1240

So we truncate the revlog on the first of these revisions, and

1240

So we truncate the revlog on the first of these revisions, and

1241

trust that the caller has saved the revisions that shouldn't be

1241

trust that the caller has saved the revisions that shouldn't be

1242

removed and that it'll re-add them after this truncation.

1242

removed and that it'll re-add them after this truncation.

1243

"""

1243

"""

1244

if len(self) == 0:

1244

if len(self) == 0:

1245

return

1245

return

1246

1247

for rev in self:

1247

for rev in self:

1248

if self.index[rev][4] >= minlink:

1248

if self.index[rev][4] >= minlink:

1249

break

1249

break

1250

else:

1250

else:

1251

return

1251

return

1252

1253

# first truncate the files on disk

1253

# first truncate the files on disk

1254

end = self.start(rev)

1254

end = self.start(rev)

1255

if not self._inline:

1255

if not self._inline:

1256

transaction.add(self.datafile, end)

1256

transaction.add(self.datafile, end)

1257

end = rev * self._io.size

1257

end = rev * self._io.size

1258

else:

1258

else:

1259

end += rev * self._io.size

1259

end += rev * self._io.size

1260

1261

transaction.add(self.indexfile, end)

1261

transaction.add(self.indexfile, end)

1262

1263

# then reset internal state in memory to forget those revisions

1263

# then reset internal state in memory to forget those revisions

1264

self._cache = None

1264

self._cache = None

1265

self._chunkclear()

1265

self._chunkclear()

1266

for x in xrange(rev, len(self)):

1266

for x in xrange(rev, len(self)):

1267

del self.nodemap[self.node(x)]

1267

del self.nodemap[self.node(x)]

1268

1269

del self.index[rev:-1]

1269

del self.index[rev:-1]

1270

1271

def checksize(self):

1271

def checksize(self):

1272

expected = 0

1272

expected = 0

1273

if len(self):

1273

if len(self):

1274

expected = max(0, self.end(len(self) - 1))

1274

expected = max(0, self.end(len(self) - 1))

1275

1276

try:

1276

try:

1277

f = self.opener(self.datafile)

1277

f = self.opener(self.datafile)

1278

f.seek(0, 2)

1278

f.seek(0, 2)

1279

actual = f.tell()

1279

actual = f.tell()

1280

f.close()

1280

f.close()

1281

dd = actual - expected

1281

dd = actual - expected

1282

except IOError, inst:

1282

except IOError, inst:

1283

if inst.errno != errno.ENOENT:

1283

if inst.errno != errno.ENOENT:

1284

raise

1284

raise

1285

dd = 0

1285

dd = 0

1286

1287

try:

1287

try:

1288

f = self.opener(self.indexfile)

1288

f = self.opener(self.indexfile)

1289

f.seek(0, 2)

1289

f.seek(0, 2)

1290

actual = f.tell()

1290

actual = f.tell()

1291

f.close()

1291

f.close()

1292

s = self._io.size

1292

s = self._io.size

1293

i = max(0, actual // s)

1293

i = max(0, actual // s)

1294

di = actual - (i * s)

1294

di = actual - (i * s)

1295

if self._inline:

1295

if self._inline:

1296

databytes = 0

1296

databytes = 0

1297

for r in self:

1297

for r in self:

1298

databytes += max(0, self.length(r))

1298

databytes += max(0, self.length(r))

1299

dd = 0

1299

dd = 0

1300

di = actual - len(self) * s - databytes

1300

di = actual - len(self) * s - databytes

1301

except IOError, inst:

1301

except IOError, inst:

1302

if inst.errno != errno.ENOENT:

1302

if inst.errno != errno.ENOENT:

1303

raise

1303

raise

1304

di = 0

1304

di = 0

1305

1306

return (dd, di)

1306

return (dd, di)

1307

1308

def files(self):

1308

def files(self):

1309

res = [self.indexfile]

1309

res = [self.indexfile]

1310

if not self._inline:

1310

if not self._inline:

1311

res.append(self.datafile)

1311

res.append(self.datafile)

1312

return res

1312

return res

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             # import stuff from node for others to import from revlog
             from node import bin, hex, nullid, nullrev
             from i18n import _
             import ancestor, mdiff, parsers, error, util, dagutil
             import struct, zlib, errno
             _pack = struct.pack
             _unpack = struct.unpack
             _compress = zlib.compress
             _decompress = zlib.decompress
             _sha = util.sha1
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_KNOWN_FLAGS = 0
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 return long(long(offset) << 16 | type)
             nullhash = _sha(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = _sha(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
-            def compress(text):
-                """ generate a possibly-compressed representation of text """
-                if not text:
-                    return ("", text)
-                l = len(text)
-                bin = None
-                if l < 44:
-                    pass
-                elif l > 1000000:
-                    # zlib makes an internal copy, thus doubling memory usage for
-                    # large files, so lets do this in pieces
-                    z = zlib.compressobj()
-                    p = []
-                    pos = 0
-                    while pos < l:
-                        pos2 = pos + 2**20
-                        p.append(z.compress(text[pos:pos2]))
-                        pos = pos2
-                    p.append(z.flush())
-                    if sum(map(len, p)) < l:
-                        bin = "".join(p)
-                else:
-                    bin = _compress(text)
-                if bin is None or len(bin) > l:
-                    if text[0] == '\0':
-                        return ("", text)
-                    return ('u', text)
-                return ("", bin)
             def decompress(bin):
                 """ decompress the given input """
                 if not bin:
                     return bin
                 t = bin[0]
                 if t == '\0':
                     return bin
                 if t == 'x':
                     try:
                         return _decompress(bin)
                     except zlib.error, e:
                         raise RevlogError(_("revlog decompress error: %s") % str(e))
                 if t == 'u':
                     return bin[1:]
                 raise RevlogError(_("unknown compression type %r") % t)
             indexformatv0 = ">4l20s20s20s"
             v0shaoffset = 56
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap =  {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             ngshaoffset = 32
             versionformat = ">I"
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 """
                 def __init__(self, opener, indexfile):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     self._cache = None
                     self._basecache = (0, 0)
                     self._chunkcache = (0, '')
                     self.index = []
                     self._pcache = {}
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                     i = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         i = f.read()
                         f.close()
                         if len(i) > 0:
                             v = struct.unpack(versionformat, i[:4])[0]
                             self._initempty = False
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(i, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     for i in xrange(len(self)):
                         yield i
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 def node(self, rev):
                     return self.index[rev][7]
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def length(self, rev):
                     return self.index[rev][1]
                 def chainbase(self, rev):
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     return base
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def ancestors(self, revs, stoprev=0):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     Yield a sequence of revision numbers starting with the parents
                     of each revision in revs, i.e., each revision is *not* considered
                     an ancestor of itself.  Results are in breadth-first order:
                     parents of each rev in revs, then parents of those, etc.  Result
                     does not include the null revision."""
                     visit = util.deque(revs)
                     seen = set([nullrev])
                     while visit:
                         for parent in self.parentrevs(visit.popleft()):
                             if parent < stoprev:
                                 continue
                             if parent not in seen:
                                 visit.append(parent)
                                 seen.add(parent)
                                 yield parent
                 def incancestors(self, revs, stoprev=0):
                     """Identical to ancestors() except it also generates the
                     revisions, 'revs'"""
                     for rev in revs:
                         yield rev
                     for rev in self.ancestors(revs, stoprev):
                         yield rev
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in xrange(first + 1, len(self)):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     has = set(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = util.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(r) for r in missing]
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     _common, missing = self.findcommonmissing(common, heads)
                     return missing
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [n for n in roots if n in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(n) for n in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descedents, empty parents will look like
                     # they're descendants.
                     for r in xrange(max(lowestrev, 0), highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [n for n, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         pass
                     count = len(self)
                     if not count:
                         return [nullrev]
                     ishead = [1] * (count + 1)
                     index = self.index
                     for r in xrange(count):
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0
                     return [r for r in xrange(count) if ishead[r]]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in xrange(startrev + 1, len(self)):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in range(p + 1, len(self)):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def ancestor(self, a, b):
                     """calculate the least common ancestor of nodes a and b"""
                     # fast path, check if it is a descendant
                     a, b = self.rev(a), self.rev(b)
                     start, end = sorted((a, b))
                     if self.descendant(start, end):
                         return self.node(start)
                     def parents(rev):
                         return [p for p in self.parentrevs(rev) if p != nullrev]
                     c = ancestor.ancestor(a, b, parents)
                     if c is None:
                         return nullid
                     return self.node(c)
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         return self.index.partialmatch(id)
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         raise LookupError(id, self.indexfile, _("ambiguous identifier"))
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length):
                     if self._inline:
                         df = self.opener(self.indexfile)
                     else:
                         df = self.opener(self.datafile)
                     readahead = max(65536, length)
                     df.seek(offset)
                     d = df.read(readahead)
                     df.close()
                     self._addchunk(offset, d)
                     if readahead > length:
                         return util.buffer(d, 0, length)
                     return d
                 def _getchunk(self, offset, length):
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length)
                 def _chunkraw(self, startrev, endrev):
                     start = self.start(startrev)
                     length = self.end(endrev) - start
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                     return self._getchunk(start, length)
                 def _chunk(self, rev):
                     return decompress(self._chunkraw(rev, rev))
                 def _chunkbase(self, rev):
                     return self._chunk(rev)
                 def _chunkclear(self):
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return str(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1),
                                           self.revision(rev2))
                 def revision(self, nodeorrev):
                     """return an uncompressed revision of a given node or revision
                     number.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     if rev is None:
                         rev = self.rev(node)
                     # check rev flags
                     if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_('incompatible revision flag %x') %
                                           (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
                     # build delta chain
                     chain = []
                     index = self.index # for performance
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != cachedrev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     chain.reverse()
                     base = iterrev
                     if iterrev == cachedrev:
                         # cache hit
                         text = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     self._chunkraw(base, rev)
                     if text is None:
                         text = str(self._chunkbase(base))
                     bins = [self._chunk(r) for r in chain]
                     text = mdiff.patches(text, bins)
                     text = self._checkhash(text, node, rev)
                     self._cache = (node, rev, text)
                     return text
                 def _checkhash(self, text, node, rev):
                     p1, p2 = self.parents(node)
                     if node != hash(text, p1, p2):
                         raise RevlogError(_("integrity check failed on %s:%d")
                                           % (self.indexfile, rev))
                     return text
                 def checkinlinesize(self, tr, fp=None):
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     dataoff = self.start(trindex)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r))
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     """
                     node = hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a")
                     ifh = self.opener(self.indexfile, "a+")
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
+                def compress(self, text):
+                    """ generate a possibly-compressed representation of text """
+                    if not text:
+                        return ("", text)
+                    l = len(text)
+                    bin = None
+                    if l < 44:
+                        pass
+                    elif l > 1000000:
+                        # zlib makes an internal copy, thus doubling memory usage for
+                        # large files, so lets do this in pieces
+                        z = zlib.compressobj()
+                        p = []
+                        pos = 0
+                        while pos < l:
+                            pos2 = pos + 2**20
+                            p.append(z.compress(text[pos:pos2]))
+                            pos = pos2
+                        p.append(z.flush())
+                        if sum(map(len, p)) < l:
+                            bin = "".join(p)
+                    else:
+                        bin = _compress(text)
+                    if bin is None or len(bin) > l:
+                        if text[0] == '\0':
+                            return ("", text)
+                        return ('u', text)
+                    return ("", bin)
                 def _addrevision(self, node, text, transaction, link, p1, p2,
                                  cachedelta, ifh, dfh):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to eachother.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         # flush any pending writes here so we can read it in revision
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                         basetext = self.revision(self.node(cachedelta[0]))
                         btext[0] = mdiff.patch(basetext, cachedelta[1])
                         chk = hash(btext[0], p1, p2)
                         if chk != node:
                             raise RevlogError(_("consistency error in delta"))
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             ptext = self.revision(self.node(rev))
                             delta = mdiff.textdiff(ptext, t)
-                        data = compress(delta)
+                        data = self.compress(delta)
                         l = len(data[1]) + len(data[0])
                         if basecache[0] == rev:
                             chainbase = basecache[1]
                         else:
                             chainbase = self.chainbase(rev)
                         dist = l + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         return dist, l, data, base, chainbase
                     curr = len(self)
                     prev = curr - 1
                     base = chainbase = curr
                     offset = self.end(prev)
                     flags = 0
                     d = None
                     basecache = self._basecache
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # should we try to build a delta?
                     if prev != nullrev:
                         if self._generaldelta:
                             if p1r >= basecache[1]:
                                 d = builddelta(p1r)
                             elif p2r >= basecache[1]:
                                 d = builddelta(p2r)
                             else:
                                 d = builddelta(prev)
                         else:
                             d = builddelta(prev)
                         dist, l, data, base, chainbase = d
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     if d is None or dist > textlen * 2:
                         text = buildtext()
-                        data = compress(text)
+                        data = self.compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         dfh.flush()
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._basecache = (curr, chainbase)
                     return node
                 def group(self, nodelist, bundler, reorder=None):
                     """Calculate a delta group, yielding a sequence of changegroup chunks
                     (strings).
                     Given a list of changeset revs, return a set of deltas and
                     metadata corresponding to nodes. The first delta is
                     first parent(nodelist[0]) -> nodelist[0], the receiver is
                     guaranteed to have this parent as it has all history before
                     these changesets. In the case firstparent is nullrev the
                     changegroup starts with a full revision.
                     """
                     # if we don't have any revisions touched by these changesets, bail
                     if len(nodelist) == 0:
                         yield bundler.close()
                         return
                     # for generaldelta revlogs, we linearize the revs; this will both be
                     # much quicker and generate a much smaller bundle
                     if (self._generaldelta and reorder is not False) or reorder:
                         dag = dagutil.revlogdag(self)
                         revs = set(self.rev(n) for n in nodelist)
                         revs = dag.linearize(revs)
                     else:
                         revs = sorted([self.rev(n) for n in nodelist])
                     # add the parent of the first rev
                     p = self.parentrevs(revs[0])[0]
                     revs.insert(0, p)
                     # build deltas
                     for r in xrange(len(revs) - 1):
                         prev, curr = revs[r], revs[r + 1]
                         for c in bundler.revchunk(self, curr, prev):
                             yield c
                     yield bundler.close()
                 def addgroup(self, bundle, linkmapper, transaction):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+")
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a")
                     try:
                         # loop through our set of deltas
                         chain = None
                         while True:
                             chunkdata = bundle.deltachunk(chain)
                             if not chunkdata:
                                 break
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, (baserev, delta), ifh, dfh)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a")
                                 ifh = self.opener(self.indexfile, "a")
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     for rev in self:
                         if self.index[rev][4] >= minlink:
                             break
                     else:
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError, inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res